chunkr-ai 0.0.48__tar.gz → 0.0.49__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {chunkr_ai-0.0.48/src/chunkr_ai.egg-info → chunkr_ai-0.0.49}/PKG-INFO +1 -1
  2. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/pyproject.toml +1 -1
  3. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/configuration.py +1 -0
  4. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49/src/chunkr_ai.egg-info}/PKG-INFO +1 -1
  5. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/tests/test_chunkr.py +66 -13
  6. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/LICENSE +0 -0
  7. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/README.md +0 -0
  8. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/setup.cfg +0 -0
  9. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/__init__.py +0 -0
  10. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/__init__.py +0 -0
  11. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/auth.py +0 -0
  12. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/chunkr.py +0 -0
  13. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/chunkr_base.py +0 -0
  14. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/decorators.py +0 -0
  15. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/misc.py +0 -0
  16. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/protocol.py +0 -0
  17. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/api/task_response.py +0 -0
  18. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai/models.py +0 -0
  19. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai.egg-info/SOURCES.txt +0 -0
  20. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai.egg-info/dependency_links.txt +0 -0
  21. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai.egg-info/requires.txt +0 -0
  22. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/src/chunkr_ai.egg-info/top_level.txt +0 -0
  23. {chunkr_ai-0.0.48 → chunkr_ai-0.0.49}/tests/test_file_handling.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chunkr-ai
3
- Version: 0.0.48
3
+ Version: 0.0.49
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunkr-ai"
7
- version = "0.0.48"
7
+ version = "0.0.49"
8
8
  authors = [{"name" = "Ishaan Kapoor", "email" = "ishaan@lumina.sh"}]
9
9
  description = "Python client for Chunkr: open source document intelligence"
10
10
  readme = "README.md"
@@ -23,6 +23,7 @@ class GenerationConfig(BaseModel):
23
23
  markdown: Optional[GenerationStrategy] = None
24
24
  crop_image: Optional[CroppingStrategy] = None
25
25
  embed_sources: Optional[List[EmbedSource]] = Field(default_factory=lambda: [EmbedSource.MARKDOWN])
26
+ extended_context: Optional[bool] = None
26
27
 
27
28
  class SegmentProcessing(BaseModel):
28
29
  model_config = ConfigDict(populate_by_name=True, alias_generator=str.title)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chunkr-ai
3
- Version: 0.0.48
3
+ Version: 0.0.49
4
4
  Summary: Python client for Chunkr: open source document intelligence
5
5
  Author-email: Ishaan Kapoor <ishaan@lumina.sh>
6
6
  License: MIT License
@@ -2,27 +2,25 @@ import pytest
2
2
  from pathlib import Path
3
3
  from PIL import Image
4
4
  import asyncio
5
- import base64
6
- import io
7
- import tempfile
8
5
  from typing import Awaitable
9
6
 
10
7
  from chunkr_ai import Chunkr
11
8
  from chunkr_ai.models import (
9
+ ChunkProcessing,
12
10
  Configuration,
13
- GenerationStrategy,
11
+ EmbedSource,
12
+ ErrorHandlingStrategy,
13
+ FallbackStrategy,
14
14
  GenerationConfig,
15
+ GenerationStrategy,
16
+ LlmProcessing,
15
17
  OcrStrategy,
16
18
  Pipeline,
17
19
  SegmentationStrategy,
18
20
  SegmentProcessing,
19
- ChunkProcessing,
21
+ Status,
20
22
  TaskResponse,
21
- EmbedSource,
22
- ErrorHandlingStrategy,
23
23
  Tokenizer,
24
- LlmProcessing,
25
- FallbackStrategy,
26
24
  )
27
25
 
28
26
  @pytest.fixture
@@ -167,6 +165,21 @@ def model_fallback_config():
167
165
  ),
168
166
  )
169
167
 
168
+ @pytest.fixture
169
+ def extended_context_config():
170
+ return Configuration(
171
+ segment_processing=SegmentProcessing(
172
+ picture=GenerationConfig(
173
+ extended_context=True,
174
+ html=GenerationStrategy.LLM,
175
+ ),
176
+ table=GenerationConfig(
177
+ extended_context=True,
178
+ html=GenerationStrategy.LLM,
179
+ )
180
+ ),
181
+ )
182
+
170
183
  @pytest.mark.asyncio
171
184
  async def test_ocr_auto(client, sample_path):
172
185
  response = await client.upload(sample_path, Configuration(ocr_strategy=OcrStrategy.AUTO))
@@ -265,9 +278,18 @@ async def test_cancel_task(client, sample_path):
265
278
  @pytest.mark.asyncio
266
279
  async def test_cancel_task_direct(client, sample_path):
267
280
  task = await client.create_task(sample_path)
268
- assert isinstance(task, Awaitable) and isinstance(task, TaskResponse)
269
281
  assert task.status == "Starting"
270
- await task.cancel()
282
+ try:
283
+ await task.cancel()
284
+ except Exception as e:
285
+ task = await client.get_task(task.task_id)
286
+ print(task.status)
287
+ if task.status == Status.PROCESSING:
288
+ print("Task is processing, so it can't be cancelled")
289
+ assert True
290
+ else:
291
+ print("Task status:", task.status)
292
+ raise e
271
293
  assert task.status == "Cancelled"
272
294
 
273
295
  @pytest.mark.asyncio
@@ -304,6 +326,7 @@ async def test_pipeline_type_azure(client, sample_path):
304
326
  assert response.task_id is not None
305
327
  assert response.status == "Succeeded"
306
328
  assert response.output is not None
329
+ assert response.configuration.pipeline == Pipeline.AZURE
307
330
 
308
331
  @pytest.mark.asyncio
309
332
  async def test_pipeline_type_chunkr(client, sample_path):
@@ -311,7 +334,8 @@ async def test_pipeline_type_chunkr(client, sample_path):
311
334
  assert response.task_id is not None
312
335
  assert response.status == "Succeeded"
313
336
  assert response.output is not None
314
-
337
+ assert response.configuration.pipeline == Pipeline.CHUNKR
338
+
315
339
  @pytest.mark.asyncio
316
340
  async def test_client_lifecycle(client, sample_path):
317
341
  response1 = await client.upload(sample_path)
@@ -528,4 +552,33 @@ async def test_fallback_strategy_serialization():
528
552
  # Test string representation
529
553
  assert str(none_strategy) == "None"
530
554
  assert str(default_strategy) == "Default"
531
- assert str(model_strategy) == "Model(gpt-4.1)"
555
+ assert str(model_strategy) == "Model(gpt-4.1)"
556
+
557
+ @pytest.mark.asyncio
558
+ async def test_extended_context(client, sample_path, extended_context_config):
559
+ """Tests uploading with extended context enabled for pictures and tables."""
560
+ print("\nTesting extended context for Pictures and Tables...")
561
+ try:
562
+ task = await client.upload(sample_path, config=extended_context_config)
563
+ print(f"Task created with extended context config: {task.task_id}")
564
+ print(f"Initial Status: {task.status}")
565
+
566
+ # Poll the task until it finishes or fails
567
+ print(f"Final Status: {task.status}")
568
+ print(f"Message: {task.message}")
569
+
570
+ # Basic assertion: Check if the task completed (either succeeded or failed)
571
+ assert task.status in [Status.SUCCEEDED, Status.FAILED], f"Task ended in unexpected state: {task.status}"
572
+
573
+ # More specific assertions based on expected outcomes with your local server
574
+ # if task.status == Status.FAILED:
575
+ # assert "context_length_exceeded" in task.message, "Expected context length error"
576
+ # elif task.status == Status.SUCCEEDED:
577
+ # # Check if output reflects extended context usage if possible
578
+ # pass
579
+
580
+ print("Extended context test completed.")
581
+
582
+ except Exception as e:
583
+ print(f"Error during extended context test: {e}")
584
+ raise # Re-raise the exception to fail the test explicitly
File without changes
File without changes
File without changes