chunkr-ai 0.0.48__tar.gz → 0.0.50__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {chunkr_ai-0.0.48/src/chunkr_ai.egg-info → chunkr_ai-0.0.50}/PKG-INFO +1 -1
  2. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/pyproject.toml +1 -1
  3. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/src/chunkr_ai/api/configuration.py +1 -0
  4. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50/src/chunkr_ai.egg-info}/PKG-INFO +1 -1
  5. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/tests/test_chunkr.py +68 -14
  6. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/LICENSE +0 -0
  7. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/README.md +0 -0
  8. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/setup.cfg +0 -0
  9. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/src/chunkr_ai/__init__.py +0 -0
  10. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/src/chunkr_ai/api/__init__.py +0 -0
  11. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/src/chunkr_ai/api/auth.py +0 -0
  12. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/src/chunkr_ai/api/chunkr.py +0 -0
  13. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/src/chunkr_ai/api/chunkr_base.py +0 -0
  14. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/src/chunkr_ai/api/decorators.py +0 -0
  15. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/src/chunkr_ai/api/misc.py +0 -0
  16. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/src/chunkr_ai/api/protocol.py +0 -0
  17. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/src/chunkr_ai/api/task_response.py +0 -0
  18. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/src/chunkr_ai/models.py +0 -0
  19. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
  20. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
  21. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/src/chunkr_ai.egg-info/requires.txt +0 -0
  22. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/src/chunkr_ai.egg-info/top_level.txt +0 -0
  23. {chunkr_ai-0.0.48 → chunkr_ai-0.0.50}/tests/test_file_handling.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chunkr-ai
3
- Version: 0.0.48
3
+ Version: 0.0.50
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunkr-ai"
7
- version = "0.0.48"
7
+ version = "0.0.50"
8
8
  authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
9
9
  description = "Python client for Chunkr: open source document intelligence"
10
10
  readme = "README.md"
@@ -23,6 +23,7 @@ class GenerationConfig(BaseModel):
23
23
  markdown: Optional[GenerationStrategy] = None
24
24
  crop_image: Optional[CroppingStrategy] = None
25
25
  embed_sources: Optional[List[EmbedSource]] = Field(default_factory=lambda: [EmbedSource.MARKDOWN])
26
+ extended_context: Optional[bool] = None
26
27
 
27
28
  class SegmentProcessing(BaseModel):
28
29
  model_config = ConfigDict(populate_by_name=True, alias_generator=str.title)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chunkr-ai
3
- Version: 0.0.48
3
+ Version: 0.0.50
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -2,27 +2,25 @@ import pytest
2
2
  from pathlib import Path
3
3
  from PIL import Image
4
4
  import asyncio
5
- import base64
6
- import io
7
- import tempfile
8
5
  from typing import Awaitable
9
6
 
10
7
  from chunkr_ai import Chunkr
11
8
  from chunkr_ai.models import (
9
+ ChunkProcessing,
12
10
  Configuration,
13
- GenerationStrategy,
11
+ EmbedSource,
12
+ ErrorHandlingStrategy,
13
+ FallbackStrategy,
14
14
  GenerationConfig,
15
+ GenerationStrategy,
16
+ LlmProcessing,
15
17
  OcrStrategy,
16
18
  Pipeline,
17
19
  SegmentationStrategy,
18
20
  SegmentProcessing,
19
- ChunkProcessing,
21
+ Status,
20
22
  TaskResponse,
21
- EmbedSource,
22
- ErrorHandlingStrategy,
23
23
  Tokenizer,
24
- LlmProcessing,
25
- FallbackStrategy,
26
24
  )
27
25
 
28
26
  @pytest.fixture
@@ -167,6 +165,21 @@ def model_fallback_config():
167
165
  ),
168
166
  )
169
167
 
168
+ @pytest.fixture
169
+ def extended_context_config():
170
+ return Configuration(
171
+ segment_processing=SegmentProcessing(
172
+ picture=GenerationConfig(
173
+ extended_context=True,
174
+ html=GenerationStrategy.LLM,
175
+ ),
176
+ table=GenerationConfig(
177
+ extended_context=True,
178
+ html=GenerationStrategy.LLM,
179
+ )
180
+ ),
181
+ )
182
+
170
183
  @pytest.mark.asyncio
171
184
  async def test_ocr_auto(client, sample_path):
172
185
  response = await client.upload(sample_path, Configuration(ocr_strategy=OcrStrategy.AUTO))
@@ -265,9 +278,18 @@ async def test_cancel_task(client, sample_path):
265
278
  @pytest.mark.asyncio
266
279
  async def test_cancel_task_direct(client, sample_path):
267
280
  task = await client.create_task(sample_path)
268
- assert isinstance(task, Awaitable) and isinstance(task, TaskResponse)
269
281
  assert task.status == "Starting"
270
- await task.cancel()
282
+ try:
283
+ await task.cancel()
284
+ except Exception as e:
285
+ task = await client.get_task(task.task_id)
286
+ print(task.status)
287
+ if task.status == Status.PROCESSING:
288
+ print("Task is processing, so it can't be cancelled")
289
+ assert True
290
+ else:
291
+ print("Task status:", task.status)
292
+ raise e
271
293
  assert task.status == "Cancelled"
272
294
 
273
295
  @pytest.mark.asyncio
@@ -293,7 +315,8 @@ async def test_update_task_direct(client, sample_path):
293
315
  segmentation_strategy=SegmentationStrategy.PAGE,
294
316
  )
295
317
  task = await client.upload(sample_path, original_config)
296
- task = await task.update(new_config)
318
+ task = await (await task.update(new_config))
319
+ assert isinstance(task, TaskResponse)
297
320
  assert task.status == "Succeeded"
298
321
  assert task.output is not None
299
322
  assert task.configuration.segmentation_strategy == SegmentationStrategy.PAGE
@@ -304,6 +327,7 @@ async def test_pipeline_type_azure(client, sample_path):
304
327
  assert response.task_id is not None
305
328
  assert response.status == "Succeeded"
306
329
  assert response.output is not None
330
+ assert response.configuration.pipeline == Pipeline.AZURE
307
331
 
308
332
  @pytest.mark.asyncio
309
333
  async def test_pipeline_type_chunkr(client, sample_path):
@@ -311,7 +335,8 @@ async def test_pipeline_type_chunkr(client, sample_path):
311
335
  assert response.task_id is not None
312
336
  assert response.status == "Succeeded"
313
337
  assert response.output is not None
314
-
338
+ assert response.configuration.pipeline == Pipeline.CHUNKR
339
+
315
340
  @pytest.mark.asyncio
316
341
  async def test_client_lifecycle(client, sample_path):
317
342
  response1 = await client.upload(sample_path)
@@ -528,4 +553,33 @@ async def test_fallback_strategy_serialization():
528
553
  # Test string representation
529
554
  assert str(none_strategy) == "None"
530
555
  assert str(default_strategy) == "Default"
531
- assert str(model_strategy) == "Model(gpt-4.1)"
556
+ assert str(model_strategy) == "Model(gpt-4.1)"
557
+
558
+ @pytest.mark.asyncio
559
+ async def test_extended_context(client, sample_path, extended_context_config):
560
+ """Tests uploading with extended context enabled for pictures and tables."""
561
+ print("\nTesting extended context for Pictures and Tables...")
562
+ try:
563
+ task = await client.upload(sample_path, config=extended_context_config)
564
+ print(f"Task created with extended context config: {task.task_id}")
565
+ print(f"Initial Status: {task.status}")
566
+
567
+ # Poll the task until it finishes or fails
568
+ print(f"Final Status: {task.status}")
569
+ print(f"Message: {task.message}")
570
+
571
+ # Basic assertion: Check if the task completed (either succeeded or failed)
572
+ assert task.status in [Status.SUCCEEDED, Status.FAILED], f"Task ended in unexpected state: {task.status}"
573
+
574
+ # More specific assertions based on expected outcomes with your local server
575
+ # if task.status == Status.FAILED:
576
+ # assert "context_length_exceeded" in task.message, "Expected context length error"
577
+ # elif task.status == Status.SUCCEEDED:
578
+ # # Check if output reflects extended context usage if possible
579
+ # pass
580
+
581
+ print("Extended context test completed.")
582
+
583
+ except Exception as e:
584
+ print(f"Error during extended context test: {e}")
585
+ raise # Re-raise the exception to fail the test explicitly
File without changes
File without changes
File without changes