poma 0.2.3__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {poma-0.2.3/poma.egg-info → poma-0.3.2}/PKG-INFO +28 -20
- {poma-0.2.3 → poma-0.3.2}/README.md +9 -5
- {poma-0.2.3 → poma-0.3.2}/poma/client.py +79 -47
- poma-0.3.2/poma/integrations/__init__.py +0 -0
- poma-0.3.2/poma/integrations/langchain/__init__.py +12 -0
- {poma-0.2.3/poma/integrations → poma-0.3.2/poma/integrations/llamaindex}/__init__.py +0 -9
- poma-0.3.2/poma/integrations/qdrant/__init__.py +23 -0
- poma-0.3.2/poma/integrations/qdrant/qdrant_poma.py +1326 -0
- {poma-0.2.3 → poma-0.3.2/poma.egg-info}/PKG-INFO +28 -20
- {poma-0.2.3 → poma-0.3.2}/poma.egg-info/SOURCES.txt +7 -2
- {poma-0.2.3 → poma-0.3.2}/poma.egg-info/requires.txt +8 -2
- poma-0.3.2/poma_openai_dpa.poma +0 -0
- {poma-0.2.3 → poma-0.3.2}/pyproject.toml +13 -5
- {poma-0.2.3 → poma-0.3.2}/.github/workflows/python-publish.yml +0 -0
- {poma-0.2.3 → poma-0.3.2}/.gitignore +0 -0
- {poma-0.2.3 → poma-0.3.2}/LICENSE +0 -0
- {poma-0.2.3 → poma-0.3.2}/poma/__init__.py +0 -0
- {poma-0.2.3 → poma-0.3.2}/poma/exceptions.py +0 -0
- {poma-0.2.3/poma/integrations → poma-0.3.2/poma/integrations/langchain}/langchain_poma.py +0 -0
- {poma-0.2.3/poma/integrations → poma-0.3.2/poma/integrations/llamaindex}/llamaindex_poma.py +0 -0
- {poma-0.2.3 → poma-0.3.2}/poma/retrieval.py +0 -0
- {poma-0.2.3 → poma-0.3.2}/poma.egg-info/dependency_links.txt +0 -0
- {poma-0.2.3 → poma-0.3.2}/poma.egg-info/top_level.txt +0 -0
- {poma-0.2.3 → poma-0.3.2}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: poma
|
|
3
|
-
Version: 0.2
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Official Python SDK for the Poma document-processing API
|
|
5
5
|
Author-email: "POMA AI GmbH, Berlin" <sdk@poma-ai.com>
|
|
6
6
|
License-Expression: MPL-2.0
|
|
@@ -10,20 +10,24 @@ Description-Content-Type: text/markdown
|
|
|
10
10
|
License-File: LICENSE
|
|
11
11
|
Requires-Dist: httpx==0.28.1
|
|
12
12
|
Requires-Dist: pydantic==2.11.7
|
|
13
|
-
Provides-Extra:
|
|
14
|
-
Requires-Dist: langchain==0.3.27; extra == "
|
|
15
|
-
Requires-Dist: langchain-text-splitters==0.3.9; extra == "
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
Requires-Dist:
|
|
20
|
-
|
|
21
|
-
Requires-Dist:
|
|
22
|
-
Requires-Dist:
|
|
23
|
-
Requires-Dist:
|
|
24
|
-
Requires-Dist:
|
|
25
|
-
Requires-Dist:
|
|
26
|
-
Requires-Dist:
|
|
13
|
+
Provides-Extra: langchain
|
|
14
|
+
Requires-Dist: langchain==0.3.27; extra == "langchain"
|
|
15
|
+
Requires-Dist: langchain-text-splitters==0.3.9; extra == "langchain"
|
|
16
|
+
Provides-Extra: llamaindex
|
|
17
|
+
Requires-Dist: llama-index==0.13.0; extra == "llamaindex"
|
|
18
|
+
Provides-Extra: qdrant
|
|
19
|
+
Requires-Dist: qdrant-client[fastembed]==1.16.2; extra == "qdrant"
|
|
20
|
+
Provides-Extra: all
|
|
21
|
+
Requires-Dist: langchain==0.3.27; extra == "all"
|
|
22
|
+
Requires-Dist: langchain-text-splitters==0.3.9; extra == "all"
|
|
23
|
+
Requires-Dist: llama-index==0.13.0; extra == "all"
|
|
24
|
+
Requires-Dist: llama-index-vector-stores-faiss==0.5.0; extra == "all"
|
|
25
|
+
Requires-Dist: faiss-cpu==1.10.0; extra == "all"
|
|
26
|
+
Requires-Dist: langchain_openai==0.3.28; extra == "all"
|
|
27
|
+
Requires-Dist: langchain_community==0.3.27; extra == "all"
|
|
28
|
+
Requires-Dist: llama-index-embeddings-langchain==0.4.0; extra == "all"
|
|
29
|
+
Requires-Dist: qdrant-client[fastembed]==1.16.2; extra == "all"
|
|
30
|
+
Requires-Dist: dotenv; extra == "all"
|
|
27
31
|
Dynamic: license-file
|
|
28
32
|
|
|
29
33
|

|
|
@@ -38,11 +42,14 @@ Requires Python 3.10+. Install the core packages:
|
|
|
38
42
|
pip install poma
|
|
39
43
|
```
|
|
40
44
|
|
|
41
|
-
For integrations
|
|
45
|
+
For different integrations:
|
|
42
46
|
```bash
|
|
43
|
-
pip install 'poma[
|
|
44
|
-
|
|
45
|
-
pip install 'poma[
|
|
47
|
+
pip install 'poma[langchain]'
|
|
48
|
+
pip install 'poma[llamaindex]'
|
|
49
|
+
pip install 'poma[qdrant]'
|
|
50
|
+
|
|
51
|
+
# Or LangChain/LlamaIndex/Qdrant including example extras:
|
|
52
|
+
pip install 'poma[all]'
|
|
46
53
|
```
|
|
47
54
|
|
|
48
55
|
|
|
@@ -53,10 +60,11 @@ pip install 'poma[integration-examples]'
|
|
|
53
60
|
|
|
54
61
|
### Example Implementations — all examples, integrations, and additional information can be found in our GitHub repository: [poma-ai/poma](https://github.com/poma-ai/)
|
|
55
62
|
|
|
56
|
-
We provide
|
|
63
|
+
We provide example implementations to help you get started with POMA AI:
|
|
57
64
|
- example.py — A standalone implementation for documents, showing the basic POMA AI workflow with simple keyword-based retrieval
|
|
58
65
|
- example_langchain.py — Integration with LangChain, demonstrating how easy it is to use POMA AI with LangChain
|
|
59
66
|
- example_llamaindex.py — Integration with LlamaIndex, showing how simple it is to use POMA AI with LlamaIndex
|
|
67
|
+
-
|
|
60
68
|
|
|
61
69
|
*Note: The integration examples use OpenAI embeddings. Make sure to set your OPENAI_API_KEY environment variable, or replace the embeddings with your preferred ones.*
|
|
62
70
|
|
|
@@ -10,11 +10,14 @@ Requires Python 3.10+. Install the core packages:
|
|
|
10
10
|
pip install poma
|
|
11
11
|
```
|
|
12
12
|
|
|
13
|
-
For integrations
|
|
13
|
+
For different integrations:
|
|
14
14
|
```bash
|
|
15
|
-
pip install 'poma[
|
|
16
|
-
|
|
17
|
-
pip install 'poma[
|
|
15
|
+
pip install 'poma[langchain]'
|
|
16
|
+
pip install 'poma[llamaindex]'
|
|
17
|
+
pip install 'poma[qdrant]'
|
|
18
|
+
|
|
19
|
+
# Or LangChain/LlamaIndex/Qdrant including example extras:
|
|
20
|
+
pip install 'poma[all]'
|
|
18
21
|
```
|
|
19
22
|
|
|
20
23
|
|
|
@@ -25,10 +28,11 @@ pip install 'poma[integration-examples]'
|
|
|
25
28
|
|
|
26
29
|
### Example Implementations — all examples, integrations, and additional information can be found in our GitHub repository: [poma-ai/poma](https://github.com/poma-ai/)
|
|
27
30
|
|
|
28
|
-
We provide
|
|
31
|
+
We provide example implementations to help you get started with POMA AI:
|
|
29
32
|
- example.py — A standalone implementation for documents, showing the basic POMA AI workflow with simple keyword-based retrieval
|
|
30
33
|
- example_langchain.py — Integration with LangChain, demonstrating how easy it is to use POMA AI with LangChain
|
|
31
34
|
- example_llamaindex.py — Integration with LlamaIndex, showing how simple it is to use POMA AI with LlamaIndex
|
|
35
|
+
-
|
|
32
36
|
|
|
33
37
|
*Note: The integration examples use OpenAI embeddings. Make sure to set your OPENAI_API_KEY environment variable, or replace the embeddings with your preferred ones.*
|
|
34
38
|
|
|
@@ -21,6 +21,38 @@ USER_AGENT = "poma-ai-sdk/0.1.0"
|
|
|
21
21
|
API_BASE_URL = "https://api.poma-ai.com/api/v1"
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
def extract_chunks_and_chunksets_from_poma_archive(
|
|
25
|
+
poma_archive_data: bytes | None = None,
|
|
26
|
+
poma_archive_path: str | os.PathLike[str] | None = None,
|
|
27
|
+
) -> dict[str, Any]:
|
|
28
|
+
"""
|
|
29
|
+
Extract chunks and chunksets from a POMA archive file.
|
|
30
|
+
POMA archive is a zip file containing chunks.json and chunksets.json.
|
|
31
|
+
Args:
|
|
32
|
+
poma_archive_data: The POMA archive as bytes.
|
|
33
|
+
poma_archive_path: Path to the POMA archive file.
|
|
34
|
+
Returns:
|
|
35
|
+
dict: A dictionary with ``chunks`` and ``chunksets`` keys.
|
|
36
|
+
"""
|
|
37
|
+
chunks = None
|
|
38
|
+
chunksets = None
|
|
39
|
+
if poma_archive_path:
|
|
40
|
+
with zipfile.ZipFile(poma_archive_path, "r") as zip_ref:
|
|
41
|
+
chunks = zip_ref.read("chunks.json")
|
|
42
|
+
chunksets = zip_ref.read("chunksets.json")
|
|
43
|
+
elif poma_archive_data:
|
|
44
|
+
with zipfile.ZipFile(io.BytesIO(poma_archive_data), "r") as zip_ref:
|
|
45
|
+
chunks = zip_ref.read("chunks.json")
|
|
46
|
+
chunksets = zip_ref.read("chunksets.json")
|
|
47
|
+
else:
|
|
48
|
+
raise ValueError(
|
|
49
|
+
"Either poma_archive_data or poma_archive_path must be provided."
|
|
50
|
+
)
|
|
51
|
+
if not chunks or not chunksets:
|
|
52
|
+
raise KeyError("Result must contain 'chunks' and 'chunksets' keys.")
|
|
53
|
+
return {"chunks": json.loads(chunks), "chunksets": json.loads(chunksets)}
|
|
54
|
+
|
|
55
|
+
|
|
24
56
|
class Poma:
|
|
25
57
|
"""
|
|
26
58
|
Client for interacting with the POMA API.
|
|
@@ -61,23 +93,25 @@ class Poma:
|
|
|
61
93
|
|
|
62
94
|
def start_chunk_file(
|
|
63
95
|
self,
|
|
64
|
-
file_path: os.PathLike[str],
|
|
96
|
+
file_path: str | os.PathLike[str],
|
|
65
97
|
*,
|
|
66
98
|
base_url: str | None = None,
|
|
67
99
|
) -> dict[str, Any]:
|
|
68
100
|
"""
|
|
69
101
|
Submit a file with text to POMA for chunking.
|
|
70
102
|
Args:
|
|
71
|
-
file_path (os.PathLike[str]):
|
|
72
|
-
Path to the input file. Must have an allowed file extension.
|
|
103
|
+
file_path (str | os.PathLike[str]):
|
|
104
|
+
Path to the input file (string or path-like). Must have an allowed file extension.
|
|
73
105
|
base_url (str, optional):
|
|
74
106
|
Optional base URL to resolve relative links within the file.
|
|
75
107
|
Returns:
|
|
76
108
|
A dictionary containing a unique job identifier for the submitted job.
|
|
77
109
|
"""
|
|
78
|
-
if
|
|
79
|
-
raise ValueError("file_path must be a non-empty
|
|
110
|
+
if file_path is None or (isinstance(file_path, str) and not file_path.strip()):
|
|
111
|
+
raise ValueError("file_path must be a non-empty string or path-like.")
|
|
112
|
+
path = Path(file_path)
|
|
80
113
|
payload = {}
|
|
114
|
+
payload["is_sdk"] = True
|
|
81
115
|
if base_url:
|
|
82
116
|
payload["base_url"] = base_url
|
|
83
117
|
try:
|
|
@@ -85,7 +119,7 @@ class Poma:
|
|
|
85
119
|
f"{self.base_api_url}/ingest",
|
|
86
120
|
data=payload,
|
|
87
121
|
files={
|
|
88
|
-
"file": (
|
|
122
|
+
"file": (path.name, path.read_bytes()),
|
|
89
123
|
},
|
|
90
124
|
)
|
|
91
125
|
response.raise_for_status()
|
|
@@ -93,10 +127,10 @@ class Poma:
|
|
|
93
127
|
status = error.response.status_code
|
|
94
128
|
if status in (401, 403):
|
|
95
129
|
raise AuthenticationError(
|
|
96
|
-
f"Failed to submit file '{
|
|
130
|
+
f"Failed to submit file '{path}': authentication error"
|
|
97
131
|
) from error
|
|
98
132
|
raise RemoteServerError(
|
|
99
|
-
f"Failed to submit file '{
|
|
133
|
+
f"Failed to submit file '{path}': {status}"
|
|
100
134
|
) from error
|
|
101
135
|
try:
|
|
102
136
|
data = response.json()
|
|
@@ -115,6 +149,7 @@ class Poma:
|
|
|
115
149
|
max_interval: float = 15.0,
|
|
116
150
|
show_progress: bool = False,
|
|
117
151
|
download_dir: str | os.PathLike[str] | None = None,
|
|
152
|
+
filename: str | None = None,
|
|
118
153
|
) -> dict[str, Any]:
|
|
119
154
|
"""
|
|
120
155
|
Poll POMA for the result of a chunking job until completion.
|
|
@@ -130,9 +165,14 @@ class Poma:
|
|
|
130
165
|
show_progress (bool, default=False):
|
|
131
166
|
If True, logs progress messages during polling.
|
|
132
167
|
download_dir (str | os.PathLike[str], optional):
|
|
133
|
-
Directory to save the downloaded file in.
|
|
134
|
-
|
|
135
|
-
|
|
168
|
+
Directory to save the downloaded file in. If neither download_dir nor
|
|
169
|
+
filename is set, the result is returned in memory (no file saved). If
|
|
170
|
+
filename is set but download_dir is not, the file is saved in the
|
|
171
|
+
current directory.
|
|
172
|
+
filename (str, optional):
|
|
173
|
+
Name for the saved .poma file. If it does not end with ``.poma``, that
|
|
174
|
+
suffix is appended. If not set when saving to disk, uses the server
|
|
175
|
+
filename when provided, otherwise ``{job_id}.poma``.
|
|
136
176
|
Returns:
|
|
137
177
|
The JSON result containing at least the keys `chunks` and `chunksets`.
|
|
138
178
|
|
|
@@ -156,17 +196,26 @@ class Poma:
|
|
|
156
196
|
"Failed to receive download URL from server."
|
|
157
197
|
)
|
|
158
198
|
|
|
159
|
-
if download_dir is None:
|
|
199
|
+
if download_dir is None and filename is None:
|
|
160
200
|
# Return bytes content instead of saving to file
|
|
161
201
|
file_bytes = self.download_bytes(download_url)
|
|
162
202
|
return self.extract_chunks_and_chunksets_from_poma_archive(
|
|
163
203
|
poma_archive_data=file_bytes
|
|
164
204
|
)
|
|
165
205
|
else:
|
|
166
|
-
# Save downloaded file to
|
|
167
|
-
|
|
206
|
+
# Save downloaded file (to download_dir or current dir if only filename set)
|
|
207
|
+
save_filename = (
|
|
208
|
+
filename or download.get("filename") or f"{job_id}.poma"
|
|
209
|
+
)
|
|
210
|
+
if not save_filename.endswith(".poma"):
|
|
211
|
+
save_filename = f"{save_filename}.poma"
|
|
212
|
+
save_dir = (
|
|
213
|
+
download_dir
|
|
214
|
+
if download_dir not in (None, "")
|
|
215
|
+
else "."
|
|
216
|
+
)
|
|
168
217
|
downloaded_file_path = self.download_file(
|
|
169
|
-
download_url,
|
|
218
|
+
download_url, save_filename, save_directory=save_dir
|
|
170
219
|
)
|
|
171
220
|
return self.extract_chunks_and_chunksets_from_poma_archive(
|
|
172
221
|
poma_archive_path=downloaded_file_path
|
|
@@ -208,37 +257,18 @@ class Poma:
|
|
|
208
257
|
poma_archive_path: str | os.PathLike[str] | None = None,
|
|
209
258
|
) -> dict[str, Any]:
|
|
210
259
|
"""
|
|
211
|
-
Extract POMA archive
|
|
212
|
-
POMA archive
|
|
260
|
+
Extract chunks and chunksets from a POMA archive; delegates to module-level function.
|
|
261
|
+
POMA archive is a zip file containing chunks.json and chunksets.json.
|
|
213
262
|
Args:
|
|
214
|
-
|
|
263
|
+
poma_archive_data: The POMA archive as bytes.
|
|
264
|
+
poma_archive_path: Path to the POMA archive file.
|
|
215
265
|
Returns:
|
|
216
|
-
dict: A dictionary
|
|
266
|
+
dict: A dictionary with ``chunks`` and ``chunksets`` keys.
|
|
217
267
|
"""
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
if poma_archive_path:
|
|
223
|
-
with zipfile.ZipFile(poma_archive_path, "r") as zip_ref:
|
|
224
|
-
chunks = zip_ref.read("chunks.json")
|
|
225
|
-
chunksets = zip_ref.read("chunksets.json")
|
|
226
|
-
elif poma_archive_data:
|
|
227
|
-
with zipfile.ZipFile(io.BytesIO(poma_archive_data), "r") as zip_ref:
|
|
228
|
-
chunks = zip_ref.read("chunks.json")
|
|
229
|
-
chunksets = zip_ref.read("chunksets.json")
|
|
230
|
-
else:
|
|
231
|
-
raise ValueError(
|
|
232
|
-
"Either poma_archive_data or poma_archive_path must be provided."
|
|
233
|
-
)
|
|
234
|
-
|
|
235
|
-
# Sanity check
|
|
236
|
-
if not chunks or not chunksets:
|
|
237
|
-
raise KeyError("Result must contain 'chunks' and 'chunksets' keys.")
|
|
238
|
-
|
|
239
|
-
# Load the chunks and chunksets
|
|
240
|
-
json_result = {"chunks": json.loads(chunks), "chunksets": json.loads(chunksets)}
|
|
241
|
-
return json_result
|
|
268
|
+
return extract_chunks_and_chunksets_from_poma_archive(
|
|
269
|
+
poma_archive_data=poma_archive_data,
|
|
270
|
+
poma_archive_path=poma_archive_path,
|
|
271
|
+
)
|
|
242
272
|
|
|
243
273
|
def create_cheatsheet(
|
|
244
274
|
self,
|
|
@@ -312,14 +342,16 @@ class Poma:
|
|
|
312
342
|
if not filename:
|
|
313
343
|
filename = Path(download_url).name or "downloaded_file"
|
|
314
344
|
|
|
315
|
-
# Determine save directory
|
|
345
|
+
# Determine save directory (default current directory so path has a parent)
|
|
316
346
|
if save_directory:
|
|
317
347
|
save_path = Path(save_directory) / filename
|
|
318
348
|
else:
|
|
319
|
-
save_path = Path(filename
|
|
349
|
+
save_path = Path(".") / filename
|
|
320
350
|
|
|
321
|
-
# Create the directory if it doesn't exist
|
|
322
|
-
os.
|
|
351
|
+
# Create the directory if it doesn't exist (skip when file is in cwd)
|
|
352
|
+
parent = os.path.dirname(save_path)
|
|
353
|
+
if parent:
|
|
354
|
+
os.makedirs(parent, exist_ok=True)
|
|
323
355
|
|
|
324
356
|
# Download the file data
|
|
325
357
|
content = self.download_bytes(download_url)
|
|
File without changes
|
|
@@ -1,9 +1,3 @@
|
|
|
1
|
-
from .langchain_poma import (
|
|
2
|
-
PomaFileLoader,
|
|
3
|
-
PomaChunksetSplitter,
|
|
4
|
-
PomaCheatsheetRetrieverLC,
|
|
5
|
-
)
|
|
6
|
-
|
|
7
1
|
from .llamaindex_poma import (
|
|
8
2
|
PomaFileReader,
|
|
9
3
|
PomaChunksetNodeParser,
|
|
@@ -11,9 +5,6 @@ from .llamaindex_poma import (
|
|
|
11
5
|
)
|
|
12
6
|
|
|
13
7
|
__all__ = [
|
|
14
|
-
"PomaFileLoader",
|
|
15
|
-
"PomaChunksetSplitter",
|
|
16
|
-
"PomaCheatsheetRetrieverLC",
|
|
17
8
|
"PomaFileReader",
|
|
18
9
|
"PomaChunksetNodeParser",
|
|
19
10
|
"PomaCheatsheetRetrieverLI",
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from .qdrant_poma import (
|
|
2
|
+
PomaQdrant,
|
|
3
|
+
QdrantConfig,
|
|
4
|
+
VectorConfig,
|
|
5
|
+
InferenceConfig,
|
|
6
|
+
QdrantResponseError,
|
|
7
|
+
SearchResult,
|
|
8
|
+
chunk_uuid_string,
|
|
9
|
+
DenseEmbedSync,
|
|
10
|
+
SparseEmbedSync,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"PomaQdrant",
|
|
15
|
+
"QdrantConfig",
|
|
16
|
+
"VectorConfig",
|
|
17
|
+
"InferenceConfig",
|
|
18
|
+
"QdrantResponseError",
|
|
19
|
+
"SearchResult",
|
|
20
|
+
"chunk_uuid_string",
|
|
21
|
+
"DenseEmbedSync",
|
|
22
|
+
"SparseEmbedSync",
|
|
23
|
+
]
|