poma 0.2.3__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: poma
3
- Version: 0.2.3
3
+ Version: 0.3.2
4
4
  Summary: Official Python SDK for the Poma document-processing API
5
5
  Author-email: "POMA AI GmbH, Berlin" <sdk@poma-ai.com>
6
6
  License-Expression: MPL-2.0
@@ -10,20 +10,24 @@ Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
11
  Requires-Dist: httpx==0.28.1
12
12
  Requires-Dist: pydantic==2.11.7
13
- Provides-Extra: integrations
14
- Requires-Dist: langchain==0.3.27; extra == "integrations"
15
- Requires-Dist: langchain-text-splitters==0.3.9; extra == "integrations"
16
- Requires-Dist: llama-index==0.13.0; extra == "integrations"
17
- Provides-Extra: integration-examples
18
- Requires-Dist: langchain==0.3.27; extra == "integration-examples"
19
- Requires-Dist: langchain-text-splitters==0.3.9; extra == "integration-examples"
20
- Requires-Dist: llama-index==0.13.0; extra == "integration-examples"
21
- Requires-Dist: llama-index-vector-stores-faiss==0.5.0; extra == "integration-examples"
22
- Requires-Dist: faiss-cpu==1.10.0; extra == "integration-examples"
23
- Requires-Dist: langchain_openai==0.3.28; extra == "integration-examples"
24
- Requires-Dist: langchain_community==0.3.27; extra == "integration-examples"
25
- Requires-Dist: llama-index-embeddings-langchain==0.4.0; extra == "integration-examples"
26
- Requires-Dist: dotenv; extra == "integration-examples"
13
+ Provides-Extra: langchain
14
+ Requires-Dist: langchain==0.3.27; extra == "langchain"
15
+ Requires-Dist: langchain-text-splitters==0.3.9; extra == "langchain"
16
+ Provides-Extra: llamaindex
17
+ Requires-Dist: llama-index==0.13.0; extra == "llamaindex"
18
+ Provides-Extra: qdrant
19
+ Requires-Dist: qdrant-client[fastembed]==1.16.2; extra == "qdrant"
20
+ Provides-Extra: all
21
+ Requires-Dist: langchain==0.3.27; extra == "all"
22
+ Requires-Dist: langchain-text-splitters==0.3.9; extra == "all"
23
+ Requires-Dist: llama-index==0.13.0; extra == "all"
24
+ Requires-Dist: llama-index-vector-stores-faiss==0.5.0; extra == "all"
25
+ Requires-Dist: faiss-cpu==1.10.0; extra == "all"
26
+ Requires-Dist: langchain_openai==0.3.28; extra == "all"
27
+ Requires-Dist: langchain_community==0.3.27; extra == "all"
28
+ Requires-Dist: llama-index-embeddings-langchain==0.4.0; extra == "all"
29
+ Requires-Dist: qdrant-client[fastembed]==1.16.2; extra == "all"
30
+ Requires-Dist: dotenv; extra == "all"
27
31
  Dynamic: license-file
28
32
 
29
33
  ![POMA AI Logo](https://raw.githubusercontent.com/poma-ai/.github/main/assets/POMA_AI_Logo_Pink.svg)
@@ -38,11 +42,14 @@ Requires Python 3.10+. Install the core packages:
38
42
  pip install poma
39
43
  ```
40
44
 
41
- For integrations into LangChain and LlamaIndex:
45
+ For different integrations:
42
46
  ```bash
43
- pip install 'poma[integrations]'
44
- # Or LangChain/LlamaIndex including example extras:
45
- pip install 'poma[integration-examples]'
47
+ pip install 'poma[langchain]'
48
+ pip install 'poma[llamaindex]'
49
+ pip install 'poma[qdrant]'
50
+
51
+ # Or LangChain/LlamaIndex/Qdrant including example extras:
52
+ pip install 'poma[all]'
46
53
  ```
47
54
 
48
55
 
@@ -53,10 +60,11 @@ pip install 'poma[integration-examples]'
53
60
 
54
61
  ### Example Implementations — all examples, integrations, and additional information can be found in our GitHub repository: [poma-ai/poma](https://github.com/poma-ai/)
55
62
 
56
- We provide four example implementations to help you get started with POMA AI:
63
+ We provide example implementations to help you get started with POMA AI:
57
64
  - example.py — A standalone implementation for documents, showing the basic POMA AI workflow with simple keyword-based retrieval
58
65
  - example_langchain.py — Integration with LangChain, demonstrating how easy it is to use POMA AI with LangChain
59
66
  - example_llamaindex.py — Integration with LlamaIndex, showing how simple it is to use POMA AI with LlamaIndex
67
+ -
60
68
 
61
69
  *Note: The integration examples use OpenAI embeddings. Make sure to set your OPENAI_API_KEY environment variable, or replace the embeddings with your preferred ones.*
62
70
 
@@ -10,11 +10,14 @@ Requires Python 3.10+. Install the core packages:
10
10
  pip install poma
11
11
  ```
12
12
 
13
- For integrations into LangChain and LlamaIndex:
13
+ For different integrations:
14
14
  ```bash
15
- pip install 'poma[integrations]'
16
- # Or LangChain/LlamaIndex including example extras:
17
- pip install 'poma[integration-examples]'
15
+ pip install 'poma[langchain]'
16
+ pip install 'poma[llamaindex]'
17
+ pip install 'poma[qdrant]'
18
+
19
+ # Or LangChain/LlamaIndex/Qdrant including example extras:
20
+ pip install 'poma[all]'
18
21
  ```
19
22
 
20
23
 
@@ -25,10 +28,11 @@ pip install 'poma[integration-examples]'
25
28
 
26
29
  ### Example Implementations — all examples, integrations, and additional information can be found in our GitHub repository: [poma-ai/poma](https://github.com/poma-ai/)
27
30
 
28
- We provide four example implementations to help you get started with POMA AI:
31
+ We provide example implementations to help you get started with POMA AI:
29
32
  - example.py — A standalone implementation for documents, showing the basic POMA AI workflow with simple keyword-based retrieval
30
33
  - example_langchain.py — Integration with LangChain, demonstrating how easy it is to use POMA AI with LangChain
31
34
  - example_llamaindex.py — Integration with LlamaIndex, showing how simple it is to use POMA AI with LlamaIndex
35
+ -
32
36
 
33
37
  *Note: The integration examples use OpenAI embeddings. Make sure to set your OPENAI_API_KEY environment variable, or replace the embeddings with your preferred ones.*
34
38
 
@@ -21,6 +21,38 @@ USER_AGENT = "poma-ai-sdk/0.1.0"
21
21
  API_BASE_URL = "https://api.poma-ai.com/api/v1"
22
22
 
23
23
 
24
+ def extract_chunks_and_chunksets_from_poma_archive(
25
+ poma_archive_data: bytes | None = None,
26
+ poma_archive_path: str | os.PathLike[str] | None = None,
27
+ ) -> dict[str, Any]:
28
+ """
29
+ Extract chunks and chunksets from a POMA archive file.
30
+ POMA archive is a zip file containing chunks.json and chunksets.json.
31
+ Args:
32
+ poma_archive_data: The POMA archive as bytes.
33
+ poma_archive_path: Path to the POMA archive file.
34
+ Returns:
35
+ dict: A dictionary with ``chunks`` and ``chunksets`` keys.
36
+ """
37
+ chunks = None
38
+ chunksets = None
39
+ if poma_archive_path:
40
+ with zipfile.ZipFile(poma_archive_path, "r") as zip_ref:
41
+ chunks = zip_ref.read("chunks.json")
42
+ chunksets = zip_ref.read("chunksets.json")
43
+ elif poma_archive_data:
44
+ with zipfile.ZipFile(io.BytesIO(poma_archive_data), "r") as zip_ref:
45
+ chunks = zip_ref.read("chunks.json")
46
+ chunksets = zip_ref.read("chunksets.json")
47
+ else:
48
+ raise ValueError(
49
+ "Either poma_archive_data or poma_archive_path must be provided."
50
+ )
51
+ if not chunks or not chunksets:
52
+ raise KeyError("Result must contain 'chunks' and 'chunksets' keys.")
53
+ return {"chunks": json.loads(chunks), "chunksets": json.loads(chunksets)}
54
+
55
+
24
56
  class Poma:
25
57
  """
26
58
  Client for interacting with the POMA API.
@@ -61,23 +93,25 @@ class Poma:
61
93
 
62
94
  def start_chunk_file(
63
95
  self,
64
- file_path: os.PathLike[str],
96
+ file_path: str | os.PathLike[str],
65
97
  *,
66
98
  base_url: str | None = None,
67
99
  ) -> dict[str, Any]:
68
100
  """
69
101
  Submit a file with text to POMA for chunking.
70
102
  Args:
71
- file_path (os.PathLike[str]):
72
- Path to the input file. Must have an allowed file extension.
103
+ file_path (str | os.PathLike[str]):
104
+ Path to the input file (string or path-like). Must have an allowed file extension.
73
105
  base_url (str, optional):
74
106
  Optional base URL to resolve relative links within the file.
75
107
  Returns:
76
108
  A dictionary containing a unique job identifier for the submitted job.
77
109
  """
78
- if not file_path or not isinstance(file_path, os.PathLike):
79
- raise ValueError("file_path must be a non-empty os.PathLike.")
110
+ if file_path is None or (isinstance(file_path, str) and not file_path.strip()):
111
+ raise ValueError("file_path must be a non-empty string or path-like.")
112
+ path = Path(file_path)
80
113
  payload = {}
114
+ payload["is_sdk"] = True
81
115
  if base_url:
82
116
  payload["base_url"] = base_url
83
117
  try:
@@ -85,7 +119,7 @@ class Poma:
85
119
  f"{self.base_api_url}/ingest",
86
120
  data=payload,
87
121
  files={
88
- "file": (Path(file_path).name, Path(file_path).read_bytes()),
122
+ "file": (path.name, path.read_bytes()),
89
123
  },
90
124
  )
91
125
  response.raise_for_status()
@@ -93,10 +127,10 @@ class Poma:
93
127
  status = error.response.status_code
94
128
  if status in (401, 403):
95
129
  raise AuthenticationError(
96
- f"Failed to submit file '{file_path}': authentication error"
130
+ f"Failed to submit file '{path}': authentication error"
97
131
  ) from error
98
132
  raise RemoteServerError(
99
- f"Failed to submit file '{file_path}': {status}"
133
+ f"Failed to submit file '{path}': {status}"
100
134
  ) from error
101
135
  try:
102
136
  data = response.json()
@@ -115,6 +149,7 @@ class Poma:
115
149
  max_interval: float = 15.0,
116
150
  show_progress: bool = False,
117
151
  download_dir: str | os.PathLike[str] | None = None,
152
+ filename: str | None = None,
118
153
  ) -> dict[str, Any]:
119
154
  """
120
155
  Poll POMA for the result of a chunking job until completion.
@@ -130,9 +165,14 @@ class Poma:
130
165
  show_progress (bool, default=False):
131
166
  If True, logs progress messages during polling.
132
167
  download_dir (str | os.PathLike[str], optional):
133
- Directory to save the downloaded file in. Required if return_bytes=False.
134
- return_bytes (bool, default=False):
135
- If True, returns the file content as bytes instead of saving to disk.
168
+ Directory to save the downloaded file in. If neither download_dir nor
169
+ filename is set, the result is returned in memory (no file saved). If
170
+ filename is set but download_dir is not, the file is saved in the
171
+ current directory.
172
+ filename (str, optional):
173
+ Name for the saved .poma file. If it does not end with ``.poma``, that
174
+ suffix is appended. If not set when saving to disk, uses the server
175
+ filename when provided, otherwise ``{job_id}.poma``.
136
176
  Returns:
137
177
  The JSON result containing at least the keys `chunks` and `chunksets`.
138
178
 
@@ -156,17 +196,26 @@ class Poma:
156
196
  "Failed to receive download URL from server."
157
197
  )
158
198
 
159
- if download_dir is None:
199
+ if download_dir is None and filename is None:
160
200
  # Return bytes content instead of saving to file
161
201
  file_bytes = self.download_bytes(download_url)
162
202
  return self.extract_chunks_and_chunksets_from_poma_archive(
163
203
  poma_archive_data=file_bytes
164
204
  )
165
205
  else:
166
- # Save downloaded file to directory
167
- filename = download.get("filename", "downloaded_file.poma")
206
+ # Save downloaded file (to download_dir or current dir if only filename set)
207
+ save_filename = (
208
+ filename or download.get("filename") or f"{job_id}.poma"
209
+ )
210
+ if not save_filename.endswith(".poma"):
211
+ save_filename = f"{save_filename}.poma"
212
+ save_dir = (
213
+ download_dir
214
+ if download_dir not in (None, "")
215
+ else "."
216
+ )
168
217
  downloaded_file_path = self.download_file(
169
- download_url, filename, save_directory=download_dir
218
+ download_url, save_filename, save_directory=save_dir
170
219
  )
171
220
  return self.extract_chunks_and_chunksets_from_poma_archive(
172
221
  poma_archive_path=downloaded_file_path
@@ -208,37 +257,18 @@ class Poma:
208
257
  poma_archive_path: str | os.PathLike[str] | None = None,
209
258
  ) -> dict[str, Any]:
210
259
  """
211
- Extract POMA archive file.
212
- POMA archive file is a zip file containing the chunks.json and chunksets.json files.
260
+ Extract chunks and chunksets from a POMA archive; delegates to module-level function.
261
+ POMA archive is a zip file containing chunks.json and chunksets.json.
213
262
  Args:
214
- poma_archive (bytes): The POMA archive file.
263
+ poma_archive_data: The POMA archive as bytes.
264
+ poma_archive_path: Path to the POMA archive file.
215
265
  Returns:
216
- dict: A dictionary containing the chunks and chunksets.
266
+ dict: A dictionary with ``chunks`` and ``chunksets`` keys.
217
267
  """
218
-
219
- # Load the chunks and chunksets from POMA archive
220
- chunks = None
221
- chunksets = None
222
- if poma_archive_path:
223
- with zipfile.ZipFile(poma_archive_path, "r") as zip_ref:
224
- chunks = zip_ref.read("chunks.json")
225
- chunksets = zip_ref.read("chunksets.json")
226
- elif poma_archive_data:
227
- with zipfile.ZipFile(io.BytesIO(poma_archive_data), "r") as zip_ref:
228
- chunks = zip_ref.read("chunks.json")
229
- chunksets = zip_ref.read("chunksets.json")
230
- else:
231
- raise ValueError(
232
- "Either poma_archive_data or poma_archive_path must be provided."
233
- )
234
-
235
- # Sanity check
236
- if not chunks or not chunksets:
237
- raise KeyError("Result must contain 'chunks' and 'chunksets' keys.")
238
-
239
- # Load the chunks and chunksets
240
- json_result = {"chunks": json.loads(chunks), "chunksets": json.loads(chunksets)}
241
- return json_result
268
+ return extract_chunks_and_chunksets_from_poma_archive(
269
+ poma_archive_data=poma_archive_data,
270
+ poma_archive_path=poma_archive_path,
271
+ )
242
272
 
243
273
  def create_cheatsheet(
244
274
  self,
@@ -312,14 +342,16 @@ class Poma:
312
342
  if not filename:
313
343
  filename = Path(download_url).name or "downloaded_file"
314
344
 
315
- # Determine save directory
345
+ # Determine save directory (default current directory so path has a parent)
316
346
  if save_directory:
317
347
  save_path = Path(save_directory) / filename
318
348
  else:
319
- save_path = Path(filename)
349
+ save_path = Path(".") / filename
320
350
 
321
- # Create the directory if it doesn't exist
322
- os.makedirs(os.path.dirname(save_path), exist_ok=True)
351
+ # Create the directory if it doesn't exist (skip when file is in cwd)
352
+ parent = os.path.dirname(save_path)
353
+ if parent:
354
+ os.makedirs(parent, exist_ok=True)
323
355
 
324
356
  # Download the file data
325
357
  content = self.download_bytes(download_url)
File without changes
@@ -0,0 +1,12 @@
1
+ from .langchain_poma import (
2
+ PomaFileLoader,
3
+ PomaChunksetSplitter,
4
+ PomaCheatsheetRetrieverLC,
5
+ )
6
+
7
+
8
+ __all__ = [
9
+ "PomaFileLoader",
10
+ "PomaChunksetSplitter",
11
+ "PomaCheatsheetRetrieverLC",
12
+ ]
@@ -1,9 +1,3 @@
1
- from .langchain_poma import (
2
- PomaFileLoader,
3
- PomaChunksetSplitter,
4
- PomaCheatsheetRetrieverLC,
5
- )
6
-
7
1
  from .llamaindex_poma import (
8
2
  PomaFileReader,
9
3
  PomaChunksetNodeParser,
@@ -11,9 +5,6 @@ from .llamaindex_poma import (
11
5
  )
12
6
 
13
7
  __all__ = [
14
- "PomaFileLoader",
15
- "PomaChunksetSplitter",
16
- "PomaCheatsheetRetrieverLC",
17
8
  "PomaFileReader",
18
9
  "PomaChunksetNodeParser",
19
10
  "PomaCheatsheetRetrieverLI",
@@ -0,0 +1,23 @@
1
+ from .qdrant_poma import (
2
+ PomaQdrant,
3
+ QdrantConfig,
4
+ VectorConfig,
5
+ InferenceConfig,
6
+ QdrantResponseError,
7
+ SearchResult,
8
+ chunk_uuid_string,
9
+ DenseEmbedSync,
10
+ SparseEmbedSync,
11
+ )
12
+
13
+ __all__ = [
14
+ "PomaQdrant",
15
+ "QdrantConfig",
16
+ "VectorConfig",
17
+ "InferenceConfig",
18
+ "QdrantResponseError",
19
+ "SearchResult",
20
+ "chunk_uuid_string",
21
+ "DenseEmbedSync",
22
+ "SparseEmbedSync",
23
+ ]