intextus-embed 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
intextus/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .encoder import IntextusEncoder
2
+ from .utils import compute_maxsim
3
+
4
+ __all__ = ["IntextusEncoder", "compute_maxsim"]
intextus/encoder.py ADDED
@@ -0,0 +1,205 @@
1
+ import os
2
+ from typing import List, Union, Dict, Any
3
+ import numpy as np
4
+ import onnxruntime as ort
5
+ from tokenizers import Tokenizer
6
+ from intextus.utils import get_punctuation_token_ids
7
+
8
+ class IntextusEncoder:
9
+ def __init__(
10
+ self,
11
+ model_name_or_path: str = "intextus/mxbai-edge-colbert-v0-17m-onnx",
12
+ tokenizer_path: str = None,
13
+ query_marker: str = "[Q]",
14
+ doc_marker: str = "[D]",
15
+ do_lower_case: bool = True,
16
+ provider: str = "CPUExecutionProvider"
17
+ ):
18
+ """
19
+ Pure ONNX engine for generic ColBERT execution.
20
+
21
+ Args:
22
+ model_name_or_path: Local path to a directory, an ONNX file, or a Hugging Face Hub model ID/alias.
23
+ tokenizer_path: Optional path to tokenizer.json. If None, it is resolved automatically.
24
+ query_marker: Special marker string used to denote query sequence.
25
+ doc_marker: Special marker string used to denote document sequence.
26
+ provider: Execution provider for ONNX Runtime inference.
27
+ """
28
+ # Resolve paths dynamically
29
+ model_path = None
30
+
31
+ if os.path.exists(model_name_or_path):
32
+ if os.path.isdir(model_name_or_path):
33
+ model_path = os.path.join(model_name_or_path, "model.onnx")
34
+ if tokenizer_path is None:
35
+ tokenizer_path = os.path.join(model_name_or_path, "tokenizer.json")
36
+ else:
37
+ model_path = model_name_or_path
38
+ if tokenizer_path is None:
39
+ dir_name = os.path.dirname(model_name_or_path)
40
+ tokenizer_path = os.path.join(dir_name, "tokenizer.json")
41
+ else:
42
+ repo_id = model_name_or_path
43
+ supported_mappings = {
44
+ "mxbai-edge-colbert-v0-17m": "intextus/mxbai-edge-colbert-v0-17m-onnx",
45
+ "mxbai-edge-colbert-v0-32m": "intextus/mxbai-edge-colbert-v0-32m-onnx",
46
+ "lateon": "intextus/lateon-onnx"
47
+ }
48
+ if repo_id in supported_mappings:
49
+ repo_id = supported_mappings[repo_id]
50
+
51
+ try:
52
+ from huggingface_hub import hf_hub_download
53
+ print(f"Downloading model file from Hugging Face repository '{repo_id}'...")
54
+ model_path = hf_hub_download(repo_id=repo_id, filename="model.onnx")
55
+ if tokenizer_path is None:
56
+ tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer.json")
57
+ except Exception as e:
58
+ raise ValueError(
59
+ f"Could not load model '{model_name_or_path}' from local path or Hugging Face Hub.\n"
60
+ f"Underlying error: {e}"
61
+ )
62
+
63
+ if not os.path.exists(model_path):
64
+ raise FileNotFoundError(f"ONNX model file not found at {model_path}")
65
+
66
+ if tokenizer_path is None or not os.path.exists(tokenizer_path):
67
+ raise FileNotFoundError(f"Tokenizer file not found at {tokenizer_path}")
68
+
69
+ # Initialize the ultra-fast Rust tokenizer
70
+ self.tokenizer = Tokenizer.from_file(tokenizer_path)
71
+
72
+ # Initialize execution session
73
+ self.session = ort.InferenceSession(model_path, providers=[provider])
74
+
75
+ self.do_lower_case = do_lower_case
76
+
77
+ # Dynamically discover graph inputs/outputs to remain generic
78
+ self.input_names = [i.name for i in self.session.get_inputs()]
79
+ self.output_name = self.session.get_outputs()[0].name
80
+
81
+ # Fetch token IDs for ColBERT context injection (handling trailing space variants)
82
+ self.query_marker_id = self.tokenizer.token_to_id(query_marker)
83
+ if self.query_marker_id is None:
84
+ # Fallback for models (like PyLate/mxbai) where special tokens have trailing spaces
85
+ self.query_marker_id = self.tokenizer.token_to_id(query_marker + " ")
86
+ if self.query_marker_id is not None:
87
+ query_marker = query_marker + " "
88
+
89
+ self.doc_marker_id = self.tokenizer.token_to_id(doc_marker)
90
+ if self.doc_marker_id is None:
91
+ self.doc_marker_id = self.tokenizer.token_to_id(doc_marker + " ")
92
+ if self.doc_marker_id is not None:
93
+ doc_marker = doc_marker + " "
94
+
95
+ if self.query_marker_id is None or self.doc_marker_id is None:
96
+ print(f"[Warning] Custom markers '{query_marker.strip()}'/'{doc_marker.strip()}' not found in vocabulary. Defaulting to standard tokenization.")
97
+
98
+ # Dynamically find all token IDs associated with string punctuation symbols
99
+ # to construct the punctuation masking skiplist.
100
+ skiplist_set = get_punctuation_token_ids(
101
+ vocab=self.tokenizer.get_vocab(),
102
+ query_marker=query_marker,
103
+ doc_marker=doc_marker
104
+ )
105
+ # Pre-compile the skiplist to a NumPy array for fast vector-optimized masking
106
+ self.skiplist_arr = np.array(list(skiplist_set), dtype=np.int64)
107
+
108
+ def _prepare_inputs(self, texts: List[str], marker_id: int, max_length: int) -> Dict[str, np.ndarray]:
109
+ # Lowercase texts if the model is case-insensitive
110
+ if self.do_lower_case:
111
+ texts = [t.lower() for t in texts]
112
+
113
+ # Determine the target tokenization length prior to inserting the prefix token
114
+ token_len = max_length - 1 if marker_id is not None else max_length
115
+
116
+ self.tokenizer.enable_padding(style="max_length", length=token_len)
117
+ self.tokenizer.enable_truncation(max_length=token_len)
118
+
119
+ encodings = self.tokenizer.encode_batch(texts)
120
+
121
+ input_ids = []
122
+ attention_masks = []
123
+
124
+ for enc in encodings:
125
+ ids = list(enc.ids)
126
+ mask = list(enc.attention_mask)
127
+
128
+ # Insert the ColBERT interaction marker [Q] or [D] right after [CLS] (index 1)
129
+ if marker_id is not None and len(ids) > 1:
130
+ ids.insert(1, marker_id)
131
+ ids = ids[:max_length]
132
+ mask.insert(1, 1)
133
+ mask = mask[:max_length]
134
+
135
+ input_ids.append(ids)
136
+ attention_masks.append(mask)
137
+
138
+ inputs = {
139
+ "input_ids": np.array(input_ids, dtype=np.int64),
140
+ "attention_mask": np.array(attention_masks, dtype=np.int64)
141
+ }
142
+
143
+ # Handle models exported with an optional token_type_ids layer
144
+ if "token_type_ids" in self.input_names:
145
+ inputs["token_type_ids"] = np.zeros_like(inputs["input_ids"])
146
+
147
+ return inputs
148
+
149
+ def encode_queries(self, queries: Union[str, List[str]], max_length: int = 32, normalize: bool = True) -> np.ndarray:
150
+ """
151
+ Encodes query texts into multi-vector embeddings.
152
+
153
+ Args:
154
+ queries: A single query string or list of query strings.
155
+ max_length: Maximum query sequence length (usually 32 for ColBERT).
156
+ normalize: Whether to apply L2 normalization to the output vectors.
157
+
158
+ Returns:
159
+ A NumPy array of query embeddings with shape (Batch, Seq_Len, Dim).
160
+ """
161
+ if isinstance(queries, str):
162
+ queries = [queries]
163
+ onnx_inputs = self._prepare_inputs(queries, self.query_marker_id, max_length)
164
+ embeddings = self.session.run([self.output_name], onnx_inputs)[0]
165
+
166
+ if normalize:
167
+ norm = np.linalg.norm(embeddings, axis=-1, keepdims=True)
168
+ # Optimize in-place division using where filter to avoid zero-division allocation
169
+ np.divide(embeddings, norm, out=embeddings, where=norm != 0.0)
170
+
171
+ return embeddings
172
+
173
+ def encode_docs(self, docs: Union[str, List[str]], max_length: int = 256, normalize: bool = True) -> np.ndarray:
174
+ """
175
+ Encodes document texts into multi-vector embeddings, automatically zeroing out
176
+ embeddings corresponding to punctuation tokens to reduce index footprint and search noise.
177
+
178
+ Args:
179
+ docs: A single document string or list of document strings.
180
+ max_length: Maximum document sequence length (usually 256 for ColBERT).
181
+ normalize: Whether to apply L2 normalization to the output vectors.
182
+
183
+ Returns:
184
+ A NumPy array of document embeddings with shape (Batch, Seq_Len, Dim).
185
+ """
186
+ if isinstance(docs, str):
187
+ docs = [docs]
188
+ onnx_inputs = self._prepare_inputs(docs, self.doc_marker_id, max_length)
189
+ embeddings = self.session.run([self.output_name], onnx_inputs)[0]
190
+
191
+ # Zero out embeddings for punctuation tokens in the document
192
+ input_ids = onnx_inputs["input_ids"]
193
+ # Optimized set membership check using pre-compiled NumPy array
194
+ mask = np.isin(input_ids, self.skiplist_arr)
195
+
196
+ # Apply the mask via element-wise multiplication (1.0 for words, 0.0 for punctuation)
197
+ # This executes in-place using continuous memory strides, bypassing index copy overhead
198
+ keep_mask = (~mask)[:, :, np.newaxis]
199
+ embeddings *= keep_mask
200
+
201
+ if normalize:
202
+ norm = np.linalg.norm(embeddings, axis=-1, keepdims=True)
203
+ np.divide(embeddings, norm, out=embeddings, where=norm != 0.0)
204
+
205
+ return embeddings
intextus/export.py ADDED
@@ -0,0 +1,107 @@
1
+ import argparse
2
+ import sys
3
+
4
+ def main():
5
+ parser = argparse.ArgumentParser(description="Export a PyTorch ColBERT model to ONNX for intextus.")
6
+ parser.add_argument("--model", type=str, required=True, help="Hugging Face model ID or path to local PyTorch ColBERT model.")
7
+ parser.add_argument("--output", type=str, default="model.onnx", help="Path to save the output ONNX model.")
8
+ parser.add_argument("--tokenizer-output", type=str, default="tokenizer.json", help="Path to save the tokenizer.json file.")
9
+
10
+ args = parser.parse_args()
11
+
12
+ try:
13
+ import torch
14
+ import transformers
15
+ except ImportError:
16
+ print("Error: PyTorch and Transformers are required for the export utility.")
17
+ print("Please install them using: pip install torch transformers")
18
+ sys.exit(1)
19
+
20
+ print(f"Loading ColBERT model from '{args.model}'...")
21
+
22
+ class ColBERTWrapper(torch.nn.Module):
23
+ def __init__(self, base_model, linear):
24
+ super().__init__()
25
+ self.base_model = base_model
26
+ self.linear = linear
27
+
28
+ def forward(self, input_ids, attention_mask, token_type_ids=None):
29
+ if token_type_ids is not None:
30
+ outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
31
+ else:
32
+ outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
33
+
34
+ # Use last hidden state
35
+ last_hidden_state = outputs.last_hidden_state
36
+
37
+ # Apply the custom linear projection layer
38
+ if self.linear is not None:
39
+ embeddings = self.linear(last_hidden_state)
40
+ else:
41
+ embeddings = last_hidden_state
42
+
43
+ return embeddings
44
+
45
+ # Load model and tokenizer
46
+ from transformers import AutoModel, AutoTokenizer
47
+
48
+ tokenizer = AutoTokenizer.from_pretrained(args.model)
49
+ model = AutoModel.from_pretrained(args.model, trust_remote_code=True)
50
+
51
+ # Check for linear projection layer
52
+ linear = None
53
+ if hasattr(model, "linear"):
54
+ linear = model.linear
55
+ elif hasattr(model, "projection"):
56
+ linear = model.projection
57
+ elif hasattr(model, "proj"):
58
+ linear = model.proj
59
+ elif hasattr(model, "pooler"):
60
+ pass
61
+
62
+ wrapper = ColBERTWrapper(model, linear)
63
+ wrapper.eval()
64
+
65
+ # Create dummy inputs
66
+ dummy_input_ids = torch.ones(1, 32, dtype=torch.long)
67
+ dummy_attention_mask = torch.ones(1, 32, dtype=torch.long)
68
+ dummy_token_type_ids = torch.zeros(1, 32, dtype=torch.long)
69
+
70
+ input_names = ["input_ids", "attention_mask"]
71
+ dynamic_axes = {
72
+ "input_ids": {0: "batch_size", 1: "sequence_length"},
73
+ "attention_mask": {0: "batch_size", 1: "sequence_length"},
74
+ "embeddings": {0: "batch_size", 1: "sequence_length"}
75
+ }
76
+
77
+ inputs = (dummy_input_ids, dummy_attention_mask)
78
+
79
+ # Check if the base model accepts token_type_ids
80
+ import inspect
81
+ sig = inspect.signature(model.forward)
82
+ if "token_type_ids" in sig.parameters:
83
+ inputs = (dummy_input_ids, dummy_attention_mask, dummy_token_type_ids)
84
+ input_names.append("token_type_ids")
85
+ dynamic_axes["token_type_ids"] = {0: "batch_size", 1: "sequence_length"}
86
+
87
+ print("Exporting model to ONNX...")
88
+ torch.onnx.export(
89
+ wrapper,
90
+ inputs,
91
+ args.output,
92
+ input_names=input_names,
93
+ output_names=["embeddings"],
94
+ dynamic_axes=dynamic_axes,
95
+ opset_version=14,
96
+ do_constant_folding=True
97
+ )
98
+
99
+ print(f"ONNX model saved successfully to '{args.output}'")
100
+
101
+ # Save tokenizer.json
102
+ print(f"Saving tokenizer to '{args.tokenizer_output}'...")
103
+ tokenizer._tokenizer.save(args.tokenizer_output)
104
+ print("Done!")
105
+
106
+ if __name__ == "__main__":
107
+ main()
intextus/utils.py ADDED
@@ -0,0 +1,77 @@
1
+ import string
2
+ from typing import Dict, Set
3
+ import numpy as np
4
+
5
+ def compute_maxsim(query_embeddings: np.ndarray, doc_embeddings: np.ndarray) -> float:
6
+ """
7
+ Computes the late-interaction MaxSim score between query and document vectors.
8
+
9
+ Args:
10
+ query_embeddings: Array of shape (Query_Tokens, Dim) representing query vector sequence.
11
+ doc_embeddings: Array of shape (Doc_Tokens, Dim) representing document vector sequence.
12
+
13
+ Returns:
14
+ The float score representing late-interaction relevance.
15
+ """
16
+ # Compute the dot product matrix between every query token and every document token
17
+ # Resulting shape: (Query_Tokens, Doc_Tokens)
18
+ scores = np.dot(query_embeddings, doc_embeddings.T)
19
+
20
+ # Take the maximum score across the document tokens for each query token
21
+ max_scores_per_query_token = np.max(scores, axis=1)
22
+
23
+ # Sum the maximums together to get final relevance score
24
+ return float(np.sum(max_scores_per_query_token))
25
+
26
+ def get_punctuation_token_ids(
27
+ vocab: Dict[str, int],
28
+ query_marker: str = "[Q]",
29
+ doc_marker: str = "[D]"
30
+ ) -> Set[int]:
31
+ """
32
+ Identifies tokenizer vocabulary IDs that correspond to punctuation marks.
33
+ This is used to construct a skiplist for document token masking.
34
+
35
+ Args:
36
+ vocab: Dictionary mapping token strings to their integer IDs.
37
+ query_marker: Token representing query interaction.
38
+ doc_marker: Token representing document interaction.
39
+
40
+ Returns:
41
+ A set of token IDs to be masked/skipped.
42
+ """
43
+ punctuation_chars = set(string.punctuation)
44
+ skiplist_ids = set()
45
+
46
+ # Common prefix/suffix subword markers used by various tokenizers
47
+ clean_markers = ["##", "Δ ", " ", "</w>"]
48
+
49
+ # Explicitly protect standard control tokens and query/doc markers
50
+ protected_tokens = {
51
+ query_marker,
52
+ doc_marker,
53
+ "[CLS]", "[SEP]", "[PAD]", "[MASK]", "[UNK]",
54
+ "<s>", "</s>", "<pad>", "<mask>", "<unk>"
55
+ }
56
+
57
+ for token, token_id in vocab.items():
58
+ if token in protected_tokens:
59
+ continue
60
+
61
+ cleaned = token
62
+ for marker in clean_markers:
63
+ cleaned = cleaned.replace(marker, "")
64
+
65
+ # Exclude special/control tokens (usually wrapped in [] or <> and longer than 1 char)
66
+ if len(token) > 1 and (
67
+ (token.startswith("[") and token.endswith("]")) or
68
+ (token.startswith("<") and token.endswith(">"))
69
+ ):
70
+ continue
71
+
72
+ # A token is considered punctuation if its cleaned representation consists
73
+ # entirely of standard punctuation characters (and is not empty).
74
+ if cleaned and all(char in punctuation_chars for char in cleaned):
75
+ skiplist_ids.add(token_id)
76
+
77
+ return skiplist_ids
@@ -0,0 +1,94 @@
1
+ Metadata-Version: 2.4
2
+ Name: intextus-embed
3
+ Version: 0.1.0
4
+ Summary: A lightweight, zero-PyTorch ONNX encoder for generic ColBERT models.
5
+ License: MIT
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Requires-Python: >=3.8
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: onnxruntime>=1.16.0
13
+ Requires-Dist: tokenizers>=0.19.0
14
+ Requires-Dist: numpy>=1.22.0
15
+ Dynamic: license-file
16
+
17
+ # πŸ•ΈοΈ intextus
18
+
19
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
20
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
21
+
22
+ **intextus** (Latin for *"woven into the text"*) is an ultra-lightweight, 100% PyTorch-free, and production-grade Python library designed to encode late-interaction ColBERT multi-vectors.
23
+
24
+ By replacing massive deep learning libraries with highly optimized, compiled C++/Rust backends, **intextus** delivers full ColBERT MaxSim embeddings in **under 65MB of RAM** with **zero PyTorch or Transformers dependencies**. It is optimized for edge devices, serverless functions (AWS Lambda, Cloudflare Workers), and resource-constrained environments.
25
+
26
+ ---
27
+
28
+ ## ⚑ Key Features
29
+
30
+ - **No PyTorch or Transformers:** Fully decoupled from the heavy standard library pipeline. A simple `pip install` completes in seconds.
31
+ - **Micro Memory Footprint:** Executes multi-vector graphs inside ONNX Runtime, drawing less than 65MB of RAM during inference.
32
+ - **Fast Rust Tokenization:** Uses Hugging Face's raw Rust tokenization backend directly.
33
+ - **Dynamic Punctuation Skiplist:** Dynamically parses `tokenizer.json` at initialization, creating a zero-overhead mask to discard punctuation vectors, matching ColBERT index-saving behaviors.
34
+ - **Standardized Late Interaction:** Exposes native NumPy-based MaxSim calculations.
35
+
36
+ ---
37
+
38
+ ## πŸ“¦ Installation
39
+
40
+ Install the library directly via pip:
41
+
42
+ ```bash
43
+ pip install intextus-embed
44
+ ```
45
+
46
+ > [!NOTE]
47
+ > `intextus` currently defaults to highly optimized CPU inference. Full hardware acceleration and GPU execution support are planned for a future release.
48
+
49
+
50
+ ---
51
+
52
+ ## πŸš€ Quick Start
53
+
54
+ Here is how to load a model, extract multi-vector embeddings, and compute late-interaction cross-similarity scores entirely in NumPy:
55
+
56
+ ```python
57
+ from intextus import IntextusEncoder, compute_maxsim
58
+
59
+ # Initialize the encoder (defaults to intextus/mxbai-edge-colbert-v0-17m-onnx)
60
+ model = IntextusEncoder()
61
+
62
+ # Or initialize from a local directory containing 'model.onnx' and 'tokenizer.json'
63
+ # model = IntextusEncoder("./my_model_directory")
64
+
65
+ # Extract query and document embeddings (Batch_Size, Sequence_Length, Dimension)
66
+ query_embeddings = model.encode_queries("What is ultra-low latency?")
67
+ doc_embeddings = model.encode_docs("ONNX runtime bypasses the PyTorch layer completely.")
68
+
69
+ # Compute the cross-similarity score via NumPy (using the first item in the batch)
70
+ score = compute_maxsim(query_embeddings[0], doc_embeddings[0])
71
+ print(f"Relevance Score (MaxSim): {score:.4f}")
72
+ ```
73
+
74
+ ---
75
+
76
+ ## 🎯 Supported & Tested Models
77
+
78
+ `intextus` is designed for ultra-fast, edge-compatible ColBERT execution. The primary officially supported and fully validated models are:
79
+
80
+ - **`intextus/mxbai-edge-colbert-v0-17m-onnx`** (Alias: `mxbai-edge-colbert-v0-17m`) β€” A highly-optimized, single-file ONNX representation of ModernBERT-backed `mxbai-edge-colbert-v0-17m` (66 MB, 48-dimensional late-interaction embeddings). **(Default Model)**
81
+ - **`intextus/mxbai-edge-colbert-v0-32m-onnx`** (Alias: `mxbai-edge-colbert-v0-32m`) β€” A larger, higher-capacity ONNX representation of ModernBERT-backed `mxbai-edge-colbert-v0-32m` (124 MB, 64-dimensional late-interaction embeddings).
82
+ - **`intextus/lateon-onnx`** (Alias: `lateon`) β€” A high-capacity base ModernBERT-backed model (580 MB, 128-dimensional late-interaction embeddings). Note: LateOn is case-sensitive, so load it with `IntextusEncoder("lateon", do_lower_case=False)`.
83
+
84
+ > [!NOTE]
85
+ > Any ColBERT model exported via standard Hugging Face/PyLate workflows can be loaded locally by providing the path to its `model.onnx` and `tokenizer.json`.
86
+
87
+ ---
88
+
89
+
90
+ ---
91
+
92
+ ## βš–οΈ License
93
+
94
+ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,10 @@
1
+ intextus/__init__.py,sha256=t6j6n8TW5quU-BH7f05M7ITHv6v4GCUgQ-KtAOnmX2M,120
2
+ intextus/encoder.py,sha256=PuiCw6egJAUTqW5PPrHBpibP189axweRIxdAwe1EFlU,9495
3
+ intextus/export.py,sha256=K8LMl_VnrDXZp8O-xmGx03tdw0auapsnR4t-Ypdwfzo,3942
4
+ intextus/utils.py,sha256=KxpN4KHHREshel0ll5ZA_wf52HQkUFoY-6Jy2uNrgHo,2865
5
+ intextus_embed-0.1.0.dist-info/licenses/LICENSE,sha256=UGbRVzCpgoCCzeyERJ5mvwzBx6fyBv7bPV3foxPQTCM,1073
6
+ intextus_embed-0.1.0.dist-info/METADATA,sha256=dxYQFei3s7AuX3xnXFQ07xIcO7yr3Tph6qbV6ZI1IUY,4341
7
+ intextus_embed-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
8
+ intextus_embed-0.1.0.dist-info/entry_points.txt,sha256=E9BpCOeNsO_B6OVvBDko4NuhN47SgsomP0_6psNBh7Y,57
9
+ intextus_embed-0.1.0.dist-info/top_level.txt,sha256=XzKpIJuni5qhoZ_J-BHkV7FeUhkQrdOEuk9EGBFwdMs,9
10
+ intextus_embed-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ intextus-export = intextus.export:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 intextus Authors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ intextus