doctra 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,10 +22,10 @@ def make_model(
22
22
  """
23
23
  Build a callable Outlines model for VLM processing.
24
24
 
25
- Creates an Outlines model instance configured for Gemini, OpenAI, Anthropic, OpenRouter, or Ollama
25
+ Creates an Outlines model instance configured for Gemini, OpenAI, Anthropic, OpenRouter, Qianfan, or Ollama
26
26
  providers. Only one backend is active at a time, with Gemini as the default.
27
27
 
28
- :param vlm_provider: VLM provider to use ("gemini", "openai", "anthropic", "openrouter", or "ollama", default: "gemini")
28
+ :param vlm_provider: VLM provider to use ("gemini", "openai", "anthropic", "openrouter", "qianfan", or "ollama", default: "gemini")
29
29
  :param vlm_model: Model name to use (defaults to provider-specific defaults)
30
30
  :param api_key: API key for the VLM provider (required for all providers except Ollama)
31
31
  :return: Configured Outlines model instance
@@ -43,6 +43,8 @@ def make_model(
43
43
  vlm_model = "claude-opus-4-1"
44
44
  elif vlm_provider == "openrouter":
45
45
  vlm_model = "x-ai/grok-4"
46
+ elif vlm_provider == "qianfan":
47
+ vlm_model = "ernie-4.5-turbo-vl-32k"
46
48
  elif vlm_provider == "ollama":
47
49
  vlm_model = "llava:latest"
48
50
 
@@ -87,11 +89,24 @@ def make_model(
87
89
  vlm_model
88
90
  )
89
91
 
92
+ if vlm_provider == "qianfan":
93
+ if not api_key:
94
+ raise ValueError("Qianfan provider requires api_key to be passed to make_model(...).")
95
+ # Create the Qianfan client with OpenAI-compatible interface
96
+ client = openai.OpenAI(
97
+ base_url="https://qianfan.baidubce.com/v2",
98
+ api_key=api_key,
99
+ )
100
+ return outlines.from_openai(
101
+ client,
102
+ vlm_model
103
+ )
104
+
90
105
  if vlm_provider == "ollama":
91
106
  # Ollama doesn't use Outlines, so we return a custom wrapper
92
107
  return OllamaModelWrapper(vlm_model)
93
108
 
94
- raise ValueError(f"Unsupported provider: {vlm_provider}. Use 'gemini', 'openai', 'anthropic', 'openrouter', or 'ollama'.")
109
+ raise ValueError(f"Unsupported provider: {vlm_provider}. Use 'gemini', 'openai', 'anthropic', 'openrouter', 'qianfan', or 'ollama'.")
95
110
 
96
111
 
97
112
  class OllamaModelWrapper:
@@ -32,7 +32,7 @@ class VLMStructuredExtractor:
32
32
  """
33
33
  Initialize the VLMStructuredExtractor with provider configuration.
34
34
 
35
- :param vlm_provider: VLM provider to use ("gemini", "openai", "anthropic", or "openrouter", default: "gemini")
35
+ :param vlm_provider: VLM provider to use ("gemini", "openai", "anthropic", "openrouter", "qianfan", or "ollama", default: "gemini")
36
36
  :param vlm_model: Model name to use (defaults to provider-specific defaults)
37
37
  :param api_key: API key for the VLM provider (required for all providers)
38
38
  """
doctra/version.py CHANGED
@@ -1,2 +1,2 @@
1
1
  """Version information for Doctra."""
2
- __version__ = '0.5.0'
2
+ __version__ = '0.5.1'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: doctra
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: Parse, extract, and analyze documents with ease
5
5
  Home-page: https://github.com/AdemBoukhris457/Doctra
6
6
  Author: Adem Boukhris
@@ -362,7 +362,7 @@ parser = StructuredPDFParser()
362
362
  # Parser with VLM for structured data extraction
363
363
  parser = StructuredPDFParser(
364
364
  use_vlm=True,
365
- vlm_provider="openai", # or "gemini" or "anthropic" or "openrouter"
365
+ vlm_provider="openai", # or "gemini", "anthropic", "openrouter", "qianfan", "ollama"
366
366
  vlm_api_key="your_api_key_here"
367
367
  )
368
368
 
@@ -917,7 +917,7 @@ parser.display_pages_with_boxes("document.pdf")
917
917
 
918
918
  ### 🤖 VLM Integration
919
919
  - Vision Language Model support for structured data extraction
920
- - Multiple provider options (OpenAI, Gemini, Anthropic, OpenRouter)
920
+ - Multiple provider options (OpenAI, Gemini, Anthropic, OpenRouter, Qianfan, Ollama)
921
921
  - Automatic conversion of charts and tables to structured formats
922
922
 
923
923
  ### 📊 Multiple Output Formats
@@ -1,5 +1,5 @@
1
1
  doctra/__init__.py,sha256=rNLCyODOpaPb_TTP6qmQnuWZJW9JPXrxg1IfKnvb1No,773
2
- doctra/version.py,sha256=A8O6Kr44VM50_wm5hKwwcjBUZJFPAk64i5o7DatMRlQ,62
2
+ doctra/version.py,sha256=b0vGLL2RHYHeqdwkHTZmk2FRb1-xEhcO1auAArG969s,62
3
3
  doctra/cli/__init__.py,sha256=4PTujjYRShOOUlZ7PwuWckShPWLC4v4CYIhJpzgyv1k,911
4
4
  doctra/cli/main.py,sha256=UhWTatY3qIeutZzVo9syLG2srbs8MZuGaLo5tk9xC_M,43108
5
5
  doctra/cli/utils.py,sha256=GKSSGi-JjNXufNekqCysSev7St1t32caYMduy0Tq96s,11971
@@ -15,8 +15,8 @@ doctra/engines/ocr/path_resolver.py,sha256=2_7Nsekt3dCDU3oVsgdr62iMrlAhbGNfYwgh4
15
15
  doctra/engines/ocr/pytesseract_engine.py,sha256=Imz2uwju6himkBiS8CH7DLxBRe-LtmMYZiOdb_6PoQw,2911
16
16
  doctra/engines/vlm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  doctra/engines/vlm/outlines_types.py,sha256=fQK6ru7XiXHaa8JPpaTTBaTk_zQ93ZyhFp4SyAnUdVU,1337
18
- doctra/engines/vlm/provider.py,sha256=lXQJNxDTxBHSxuEMbF37PjETEokR9o7rc2jLWEH9RnU,9943
19
- doctra/engines/vlm/service.py,sha256=nygxMe7uTq6Bv70ycBPL59F2a0ESp1Hix4j833p6rUM,4343
18
+ doctra/engines/vlm/provider.py,sha256=QMr-gcbhyXgTQOHPIjIrmsLTNfkbDR69I3uR5Z2QVU0,10521
19
+ doctra/engines/vlm/service.py,sha256=8o3JbNEkAFLNxSyu3KW7srI25PSLY-epzNZquKTxgcU,4364
20
20
  doctra/exporters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  doctra/exporters/excel_writer.py,sha256=rwyqlH73P7z413BELovQY_pS6IMkkqHEho6mbPrJ2Sk,11857
22
22
  doctra/exporters/html_writer.py,sha256=zJPoMiFF9lx9fHpdqk0y8diNNeQVC68wNvUInX918fY,46017
@@ -66,9 +66,9 @@ doctra/utils/pdf_io.py,sha256=c8EY47Z1iqVtlLFHS_n0qGuXJ5ERFaMUd84ivXV0b9E,706
66
66
  doctra/utils/progress.py,sha256=BD9YZqYLZw6yohQnyUV3w9QsQuiIrXM_EqByOSSJsDU,11912
67
67
  doctra/utils/quiet.py,sha256=5XPS-1CtJ0sVk6qgSQctdhr_wR8mP1xoJLoUbmkXROA,387
68
68
  doctra/utils/structured_utils.py,sha256=vU84dsD8wIlTyMsA9hitorGH-eroQiVuWEpBTQBUT24,1478
69
- doctra-0.5.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
70
- doctra-0.5.0.dist-info/METADATA,sha256=tall4Spu8hFtNARaVVCNl9QedT-4VUubsV4oqrMLxoc,37168
71
- doctra-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
72
- doctra-0.5.0.dist-info/entry_points.txt,sha256=4G2RHamA0llCiIXaQQm8EDkVK9JNGKbI7uDnXVFgIaY,47
73
- doctra-0.5.0.dist-info/top_level.txt,sha256=jI7E8jHci2gP9y0GYaWxlg9jG0O5n3FjHJJPLXDXMds,7
74
- doctra-0.5.0.dist-info/RECORD,,
69
+ doctra-0.5.1.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
70
+ doctra-0.5.1.dist-info/METADATA,sha256=IInFIxxklcgLQHTvStUSTkqQXwXGly0JbZOSpBQAu0A,37202
71
+ doctra-0.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
72
+ doctra-0.5.1.dist-info/entry_points.txt,sha256=4G2RHamA0llCiIXaQQm8EDkVK9JNGKbI7uDnXVFgIaY,47
73
+ doctra-0.5.1.dist-info/top_level.txt,sha256=jI7E8jHci2gP9y0GYaWxlg9jG0O5n3FjHJJPLXDXMds,7
74
+ doctra-0.5.1.dist-info/RECORD,,
File without changes