poma 0.2.2__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: poma
3
- Version: 0.2.2
3
+ Version: 0.3.2
4
4
  Summary: Official Python SDK for the Poma document-processing API
5
5
  Author-email: "POMA AI GmbH, Berlin" <sdk@poma-ai.com>
6
6
  License-Expression: MPL-2.0
@@ -10,20 +10,24 @@ Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
11
  Requires-Dist: httpx==0.28.1
12
12
  Requires-Dist: pydantic==2.11.7
13
- Provides-Extra: integrations
14
- Requires-Dist: langchain==0.3.27; extra == "integrations"
15
- Requires-Dist: langchain-text-splitters==0.3.9; extra == "integrations"
16
- Requires-Dist: llama-index==0.13.0; extra == "integrations"
17
- Provides-Extra: integration-examples
18
- Requires-Dist: langchain==0.3.27; extra == "integration-examples"
19
- Requires-Dist: langchain-text-splitters==0.3.9; extra == "integration-examples"
20
- Requires-Dist: llama-index==0.13.0; extra == "integration-examples"
21
- Requires-Dist: llama-index-vector-stores-faiss==0.5.0; extra == "integration-examples"
22
- Requires-Dist: faiss-cpu==1.10.0; extra == "integration-examples"
23
- Requires-Dist: langchain_openai==0.3.28; extra == "integration-examples"
24
- Requires-Dist: langchain_community==0.3.27; extra == "integration-examples"
25
- Requires-Dist: llama-index-embeddings-langchain==0.4.0; extra == "integration-examples"
26
- Requires-Dist: dotenv; extra == "integration-examples"
13
+ Provides-Extra: langchain
14
+ Requires-Dist: langchain==0.3.27; extra == "langchain"
15
+ Requires-Dist: langchain-text-splitters==0.3.9; extra == "langchain"
16
+ Provides-Extra: llamaindex
17
+ Requires-Dist: llama-index==0.13.0; extra == "llamaindex"
18
+ Provides-Extra: qdrant
19
+ Requires-Dist: qdrant-client[fastembed]==1.16.2; extra == "qdrant"
20
+ Provides-Extra: all
21
+ Requires-Dist: langchain==0.3.27; extra == "all"
22
+ Requires-Dist: langchain-text-splitters==0.3.9; extra == "all"
23
+ Requires-Dist: llama-index==0.13.0; extra == "all"
24
+ Requires-Dist: llama-index-vector-stores-faiss==0.5.0; extra == "all"
25
+ Requires-Dist: faiss-cpu==1.10.0; extra == "all"
26
+ Requires-Dist: langchain_openai==0.3.28; extra == "all"
27
+ Requires-Dist: langchain_community==0.3.27; extra == "all"
28
+ Requires-Dist: llama-index-embeddings-langchain==0.4.0; extra == "all"
29
+ Requires-Dist: qdrant-client[fastembed]==1.16.2; extra == "all"
30
+ Requires-Dist: dotenv; extra == "all"
27
31
  Dynamic: license-file
28
32
 
29
33
  ![POMA AI Logo](https://raw.githubusercontent.com/poma-ai/.github/main/assets/POMA_AI_Logo_Pink.svg)
@@ -38,11 +42,14 @@ Requires Python 3.10+. Install the core packages:
38
42
  pip install poma
39
43
  ```
40
44
 
41
- For integrations into LangChain and LlamaIndex:
45
+ For different integrations:
42
46
  ```bash
43
- pip install 'poma[integrations]'
44
- # Or LangChain/LlamaIndex including example extras:
45
- pip install 'poma[integration-examples]'
47
+ pip install 'poma[langchain]'
48
+ pip install 'poma[llamaindex]'
49
+ pip install 'poma[qdrant]'
50
+
51
+ # Or LangChain/LlamaIndex/Qdrant including example extras:
52
+ pip install 'poma[all]'
46
53
  ```
47
54
 
48
55
 
@@ -53,10 +60,11 @@ pip install 'poma[integration-examples]'
53
60
 
54
61
  ### Example Implementations — all examples, integrations, and additional information can be found in our GitHub repository: [poma-ai/poma](https://github.com/poma-ai/)
55
62
 
56
- We provide four example implementations to help you get started with POMA AI:
63
+ We provide example implementations to help you get started with POMA AI:
57
64
  - example.py — A standalone implementation for documents, showing the basic POMA AI workflow with simple keyword-based retrieval
58
65
  - example_langchain.py — Integration with LangChain, demonstrating how easy it is to use POMA AI with LangChain
59
66
  - example_llamaindex.py — Integration with LlamaIndex, showing how simple it is to use POMA AI with LlamaIndex
67
+ -
60
68
 
61
69
  *Note: The integration examples use OpenAI embeddings. Make sure to set your OPENAI_API_KEY environment variable, or replace the embeddings with your preferred ones.*
62
70
 
@@ -10,11 +10,14 @@ Requires Python 3.10+. Install the core packages:
10
10
  pip install poma
11
11
  ```
12
12
 
13
- For integrations into LangChain and LlamaIndex:
13
+ For different integrations:
14
14
  ```bash
15
- pip install 'poma[integrations]'
16
- # Or LangChain/LlamaIndex including example extras:
17
- pip install 'poma[integration-examples]'
15
+ pip install 'poma[langchain]'
16
+ pip install 'poma[llamaindex]'
17
+ pip install 'poma[qdrant]'
18
+
19
+ # Or LangChain/LlamaIndex/Qdrant including example extras:
20
+ pip install 'poma[all]'
18
21
  ```
19
22
 
20
23
 
@@ -25,10 +28,11 @@ pip install 'poma[integration-examples]'
25
28
 
26
29
  ### Example Implementations — all examples, integrations, and additional information can be found in our GitHub repository: [poma-ai/poma](https://github.com/poma-ai/)
27
30
 
28
- We provide four example implementations to help you get started with POMA AI:
31
+ We provide example implementations to help you get started with POMA AI:
29
32
  - example.py — A standalone implementation for documents, showing the basic POMA AI workflow with simple keyword-based retrieval
30
33
  - example_langchain.py — Integration with LangChain, demonstrating how easy it is to use POMA AI with LangChain
31
34
  - example_llamaindex.py — Integration with LlamaIndex, showing how simple it is to use POMA AI with LlamaIndex
35
+ -
32
36
 
33
37
  *Note: The integration examples use OpenAI embeddings. Make sure to set your OPENAI_API_KEY environment variable, or replace the embeddings with your preferred ones.*
34
38
 
@@ -21,6 +21,38 @@ USER_AGENT = "poma-ai-sdk/0.1.0"
21
21
  API_BASE_URL = "https://api.poma-ai.com/api/v1"
22
22
 
23
23
 
24
+ def extract_chunks_and_chunksets_from_poma_archive(
25
+ poma_archive_data: bytes | None = None,
26
+ poma_archive_path: str | os.PathLike[str] | None = None,
27
+ ) -> dict[str, Any]:
28
+ """
29
+ Extract chunks and chunksets from a POMA archive file.
30
+ POMA archive is a zip file containing chunks.json and chunksets.json.
31
+ Args:
32
+ poma_archive_data: The POMA archive as bytes.
33
+ poma_archive_path: Path to the POMA archive file.
34
+ Returns:
35
+ dict: A dictionary with ``chunks`` and ``chunksets`` keys.
36
+ """
37
+ chunks = None
38
+ chunksets = None
39
+ if poma_archive_path:
40
+ with zipfile.ZipFile(poma_archive_path, "r") as zip_ref:
41
+ chunks = zip_ref.read("chunks.json")
42
+ chunksets = zip_ref.read("chunksets.json")
43
+ elif poma_archive_data:
44
+ with zipfile.ZipFile(io.BytesIO(poma_archive_data), "r") as zip_ref:
45
+ chunks = zip_ref.read("chunks.json")
46
+ chunksets = zip_ref.read("chunksets.json")
47
+ else:
48
+ raise ValueError(
49
+ "Either poma_archive_data or poma_archive_path must be provided."
50
+ )
51
+ if not chunks or not chunksets:
52
+ raise KeyError("Result must contain 'chunks' and 'chunksets' keys.")
53
+ return {"chunks": json.loads(chunks), "chunksets": json.loads(chunksets)}
54
+
55
+
24
56
  class Poma:
25
57
  """
26
58
  Client for interacting with the POMA API.
@@ -61,23 +93,25 @@ class Poma:
61
93
 
62
94
  def start_chunk_file(
63
95
  self,
64
- file_path: os.PathLike[str],
96
+ file_path: str | os.PathLike[str],
65
97
  *,
66
98
  base_url: str | None = None,
67
99
  ) -> dict[str, Any]:
68
100
  """
69
101
  Submit a file with text to POMA for chunking.
70
102
  Args:
71
- file_path (os.PathLike[str]):
72
- Path to the input file. Must have an allowed file extension.
103
+ file_path (str | os.PathLike[str]):
104
+ Path to the input file (string or path-like). Must have an allowed file extension.
73
105
  base_url (str, optional):
74
106
  Optional base URL to resolve relative links within the file.
75
107
  Returns:
76
108
  A dictionary containing a unique job identifier for the submitted job.
77
109
  """
78
- if not file_path or not isinstance(file_path, os.PathLike):
79
- raise ValueError("file_path must be a non-empty os.PathLike.")
110
+ if file_path is None or (isinstance(file_path, str) and not file_path.strip()):
111
+ raise ValueError("file_path must be a non-empty string or path-like.")
112
+ path = Path(file_path)
80
113
  payload = {}
114
+ payload["is_sdk"] = True
81
115
  if base_url:
82
116
  payload["base_url"] = base_url
83
117
  try:
@@ -85,7 +119,7 @@ class Poma:
85
119
  f"{self.base_api_url}/ingest",
86
120
  data=payload,
87
121
  files={
88
- "file": (Path(file_path).name, Path(file_path).read_bytes()),
122
+ "file": (path.name, path.read_bytes()),
89
123
  },
90
124
  )
91
125
  response.raise_for_status()
@@ -93,10 +127,10 @@ class Poma:
93
127
  status = error.response.status_code
94
128
  if status in (401, 403):
95
129
  raise AuthenticationError(
96
- f"Failed to submit file '{file_path}': authentication error"
130
+ f"Failed to submit file '{path}': authentication error"
97
131
  ) from error
98
132
  raise RemoteServerError(
99
- f"Failed to submit file '{file_path}': {status}"
133
+ f"Failed to submit file '{path}': {status}"
100
134
  ) from error
101
135
  try:
102
136
  data = response.json()
@@ -115,6 +149,7 @@ class Poma:
115
149
  max_interval: float = 15.0,
116
150
  show_progress: bool = False,
117
151
  download_dir: str | os.PathLike[str] | None = None,
152
+ filename: str | None = None,
118
153
  ) -> dict[str, Any]:
119
154
  """
120
155
  Poll POMA for the result of a chunking job until completion.
@@ -130,15 +165,22 @@ class Poma:
130
165
  show_progress (bool, default=False):
131
166
  If True, logs progress messages during polling.
132
167
  download_dir (str | os.PathLike[str], optional):
133
- Directory to save the downloaded file in. Required if return_bytes=False.
134
- return_bytes (bool, default=False):
135
- If True, returns the file content as bytes instead of saving to disk.
168
+ Directory to save the downloaded file in. If neither download_dir nor
169
+ filename is set, the result is returned in memory (no file saved). If
170
+ filename is set but download_dir is not, the file is saved in the
171
+ current directory.
172
+ filename (str, optional):
173
+ Name for the saved .poma file. If it does not end with ``.poma``, that
174
+ suffix is appended. If not set when saving to disk, uses the server
175
+ filename when provided, otherwise ``{job_id}.poma``.
136
176
  Returns:
137
177
  The JSON result containing at least the keys `chunks` and `chunksets`.
138
178
 
139
179
  """
140
180
  time.sleep(initial_delay)
141
181
  current_interval = poll_interval
182
+ last_status = None
183
+
142
184
  while True:
143
185
  time.sleep(current_interval)
144
186
  try:
@@ -154,17 +196,26 @@ class Poma:
154
196
  "Failed to receive download URL from server."
155
197
  )
156
198
 
157
- if download_dir is None:
199
+ if download_dir is None and filename is None:
158
200
  # Return bytes content instead of saving to file
159
201
  file_bytes = self.download_bytes(download_url)
160
202
  return self.extract_chunks_and_chunksets_from_poma_archive(
161
203
  poma_archive_data=file_bytes
162
204
  )
163
205
  else:
164
- # Save downloaded file to directory
165
- filename = download.get("filename", "downloaded_file.poma")
206
+ # Save downloaded file (to download_dir or current dir if only filename set)
207
+ save_filename = (
208
+ filename or download.get("filename") or f"{job_id}.poma"
209
+ )
210
+ if not save_filename.endswith(".poma"):
211
+ save_filename = f"{save_filename}.poma"
212
+ save_dir = (
213
+ download_dir
214
+ if download_dir not in (None, "")
215
+ else "."
216
+ )
166
217
  downloaded_file_path = self.download_file(
167
- download_url, filename, save_directory=download_dir
218
+ download_url, save_filename, save_directory=save_dir
168
219
  )
169
220
  return self.extract_chunks_and_chunksets_from_poma_archive(
170
221
  poma_archive_path=downloaded_file_path
@@ -181,9 +232,18 @@ class Poma:
181
232
  elif status == "processing":
182
233
  if show_progress:
183
234
  print(f"Job {job_id} is still processing...")
235
+ if last_status == "pending":
236
+ current_interval = poll_interval
237
+ current_interval = min(current_interval * 1.5, max_interval)
238
+ elif status == "pending":
239
+ if show_progress:
240
+ print(
241
+ f"Job {job_id} is pending (queued due to rate limiting, sequential processing - common on demo accounts)..."
242
+ )
184
243
  current_interval = min(current_interval * 1.5, max_interval)
185
244
  else:
186
245
  raise InvalidResponseError(f"Unexpected job status: {status}")
246
+ last_status = status
187
247
  except httpx.HTTPStatusError as error:
188
248
  raise RemoteServerError(
189
249
  f"HTTP error: {error.response.status_code} {error.response.text}"
@@ -197,37 +257,18 @@ class Poma:
197
257
  poma_archive_path: str | os.PathLike[str] | None = None,
198
258
  ) -> dict[str, Any]:
199
259
  """
200
- Extract POMA archive file.
201
- POMA archive file is a zip file containing the chunks.json and chunksets.json files.
260
+ Extract chunks and chunksets from a POMA archive; delegates to module-level function.
261
+ POMA archive is a zip file containing chunks.json and chunksets.json.
202
262
  Args:
203
- poma_archive (bytes): The POMA archive file.
263
+ poma_archive_data: The POMA archive as bytes.
264
+ poma_archive_path: Path to the POMA archive file.
204
265
  Returns:
205
- dict: A dictionary containing the chunks and chunksets.
266
+ dict: A dictionary with ``chunks`` and ``chunksets`` keys.
206
267
  """
207
-
208
- # Load the chunks and chunksets from POMA archive
209
- chunks = None
210
- chunksets = None
211
- if poma_archive_path:
212
- with zipfile.ZipFile(poma_archive_path, "r") as zip_ref:
213
- chunks = zip_ref.read("chunks.json")
214
- chunksets = zip_ref.read("chunksets.json")
215
- elif poma_archive_data:
216
- with zipfile.ZipFile(io.BytesIO(poma_archive_data), "r") as zip_ref:
217
- chunks = zip_ref.read("chunks.json")
218
- chunksets = zip_ref.read("chunksets.json")
219
- else:
220
- raise ValueError(
221
- "Either poma_archive_data or poma_archive_path must be provided."
222
- )
223
-
224
- # Sanity check
225
- if not chunks or not chunksets:
226
- raise KeyError("Result must contain 'chunks' and 'chunksets' keys.")
227
-
228
- # Load the chunks and chunksets
229
- json_result = {"chunks": json.loads(chunks), "chunksets": json.loads(chunksets)}
230
- return json_result
268
+ return extract_chunks_and_chunksets_from_poma_archive(
269
+ poma_archive_data=poma_archive_data,
270
+ poma_archive_path=poma_archive_path,
271
+ )
231
272
 
232
273
  def create_cheatsheet(
233
274
  self,
@@ -301,14 +342,16 @@ class Poma:
301
342
  if not filename:
302
343
  filename = Path(download_url).name or "downloaded_file"
303
344
 
304
- # Determine save directory
345
+ # Determine save directory (default current directory so path has a parent)
305
346
  if save_directory:
306
347
  save_path = Path(save_directory) / filename
307
348
  else:
308
- save_path = Path(filename)
349
+ save_path = Path(".") / filename
309
350
 
310
- # Create the directory if it doesn't exist
311
- os.makedirs(os.path.dirname(save_path), exist_ok=True)
351
+ # Create the directory if it doesn't exist (skip when file is in cwd)
352
+ parent = os.path.dirname(save_path)
353
+ if parent:
354
+ os.makedirs(parent, exist_ok=True)
312
355
 
313
356
  # Download the file data
314
357
  content = self.download_bytes(download_url)
File without changes
@@ -0,0 +1,12 @@
1
+ from .langchain_poma import (
2
+ PomaFileLoader,
3
+ PomaChunksetSplitter,
4
+ PomaCheatsheetRetrieverLC,
5
+ )
6
+
7
+
8
+ __all__ = [
9
+ "PomaFileLoader",
10
+ "PomaChunksetSplitter",
11
+ "PomaCheatsheetRetrieverLC",
12
+ ]
@@ -1,9 +1,3 @@
1
- from .langchain_poma import (
2
- PomaFileLoader,
3
- PomaChunksetSplitter,
4
- PomaCheatsheetRetrieverLC,
5
- )
6
-
7
1
  from .llamaindex_poma import (
8
2
  PomaFileReader,
9
3
  PomaChunksetNodeParser,
@@ -11,9 +5,6 @@ from .llamaindex_poma import (
11
5
  )
12
6
 
13
7
  __all__ = [
14
- "PomaFileLoader",
15
- "PomaChunksetSplitter",
16
- "PomaCheatsheetRetrieverLC",
17
8
  "PomaFileReader",
18
9
  "PomaChunksetNodeParser",
19
10
  "PomaCheatsheetRetrieverLI",
@@ -0,0 +1,23 @@
1
+ from .qdrant_poma import (
2
+ PomaQdrant,
3
+ QdrantConfig,
4
+ VectorConfig,
5
+ InferenceConfig,
6
+ QdrantResponseError,
7
+ SearchResult,
8
+ chunk_uuid_string,
9
+ DenseEmbedSync,
10
+ SparseEmbedSync,
11
+ )
12
+
13
+ __all__ = [
14
+ "PomaQdrant",
15
+ "QdrantConfig",
16
+ "VectorConfig",
17
+ "InferenceConfig",
18
+ "QdrantResponseError",
19
+ "SearchResult",
20
+ "chunk_uuid_string",
21
+ "DenseEmbedSync",
22
+ "SparseEmbedSync",
23
+ ]