gemina 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. gemina-0.1.0/LICENSE +21 -0
  2. gemina-0.1.0/PKG-INFO +298 -0
  3. gemina-0.1.0/README.md +267 -0
  4. gemina-0.1.0/pyproject.toml +52 -0
  5. gemina-0.1.0/setup.cfg +4 -0
  6. gemina-0.1.0/src/gemina/__init__.py +77 -0
  7. gemina-0.1.0/src/gemina/_version.py +8 -0
  8. gemina-0.1.0/src/gemina/errors.py +51 -0
  9. gemina-0.1.0/src/gemina/generated/__init__.py +314 -0
  10. gemina-0.1.0/src/gemina/generated/api/__init__.py +18 -0
  11. gemina-0.1.0/src/gemina/generated/api/billing_api.py +1086 -0
  12. gemina-0.1.0/src/gemina/generated/api/chat_api.py +314 -0
  13. gemina-0.1.0/src/gemina/generated/api/contact_api.py +309 -0
  14. gemina-0.1.0/src/gemina/generated/api/document_api.py +8371 -0
  15. gemina-0.1.0/src/gemina/generated/api/enterprise_api.py +879 -0
  16. gemina-0.1.0/src/gemina/generated/api/file_tag_api.py +1124 -0
  17. gemina-0.1.0/src/gemina/generated/api/files_api.py +886 -0
  18. gemina-0.1.0/src/gemina/generated/api/free_tier_api.py +281 -0
  19. gemina-0.1.0/src/gemina/generated/api/legal_api.py +280 -0
  20. gemina-0.1.0/src/gemina/generated/api/meta_api.py +277 -0
  21. gemina-0.1.0/src/gemina/generated/api/retrieval_api.py +841 -0
  22. gemina-0.1.0/src/gemina/generated/api/sessions_api.py +313 -0
  23. gemina-0.1.0/src/gemina/generated/api/subscriptions_api.py +2681 -0
  24. gemina-0.1.0/src/gemina/generated/api/templates_api.py +3376 -0
  25. gemina-0.1.0/src/gemina/generated/api_client.py +807 -0
  26. gemina-0.1.0/src/gemina/generated/api_response.py +21 -0
  27. gemina-0.1.0/src/gemina/generated/configuration.py +632 -0
  28. gemina-0.1.0/src/gemina/generated/exceptions.py +218 -0
  29. gemina-0.1.0/src/gemina/generated/models/__init__.py +136 -0
  30. gemina-0.1.0/src/gemina/generated/models/aggregate_meta_dto.py +90 -0
  31. gemina-0.1.0/src/gemina/generated/models/aggregate_metric_dto.py +112 -0
  32. gemina-0.1.0/src/gemina/generated/models/aggregate_row_dto.py +103 -0
  33. gemina-0.1.0/src/gemina/generated/models/chat_query_in_dto.py +96 -0
  34. gemina-0.1.0/src/gemina/generated/models/chat_query_out_dto.py +118 -0
  35. gemina-0.1.0/src/gemina/generated/models/checkout_out_dto.py +118 -0
  36. gemina-0.1.0/src/gemina/generated/models/column_definition_dto.py +96 -0
  37. gemina-0.1.0/src/gemina/generated/models/comparison_summary_model.py +138 -0
  38. gemina-0.1.0/src/gemina/generated/models/contact_form_in_dto.py +100 -0
  39. gemina-0.1.0/src/gemina/generated/models/contract_out_dto.py +171 -0
  40. gemina-0.1.0/src/gemina/generated/models/contract_status.py +38 -0
  41. gemina-0.1.0/src/gemina/generated/models/count_breakdown_model.py +106 -0
  42. gemina-0.1.0/src/gemina/generated/models/create_template_in_dto.py +102 -0
  43. gemina-0.1.0/src/gemina/generated/models/credit_meter.py +37 -0
  44. gemina-0.1.0/src/gemina/generated/models/credit_meter_filter.py +38 -0
  45. gemina-0.1.0/src/gemina/generated/models/credit_transaction_list_out_dto.py +117 -0
  46. gemina-0.1.0/src/gemina/generated/models/credit_transaction_out_dto.py +142 -0
  47. gemina-0.1.0/src/gemina/generated/models/credit_transaction_type.py +44 -0
  48. gemina-0.1.0/src/gemina/generated/models/data_center_region_model.py +39 -0
  49. gemina-0.1.0/src/gemina/generated/models/data_retention_period_model.py +43 -0
  50. gemina-0.1.0/src/gemina/generated/models/document_data_out_dto.py +101 -0
  51. gemina-0.1.0/src/gemina/generated/models/document_file_type_model.py +38 -0
  52. gemina-0.1.0/src/gemina/generated/models/document_meta_out_dto.py +204 -0
  53. gemina-0.1.0/src/gemina/generated/models/document_primary_view_out_dto.py +123 -0
  54. gemina-0.1.0/src/gemina/generated/models/document_processing_meta_out_dto.py +218 -0
  55. gemina-0.1.0/src/gemina/generated/models/document_processing_result_out_dto.py +127 -0
  56. gemina-0.1.0/src/gemina/generated/models/document_purging_result_out_dto.py +122 -0
  57. gemina-0.1.0/src/gemina/generated/models/document_view_data_out_dto.py +101 -0
  58. gemina-0.1.0/src/gemina/generated/models/document_view_meta_out_dto.py +222 -0
  59. gemina-0.1.0/src/gemina/generated/models/document_view_out_dto.py +108 -0
  60. gemina-0.1.0/src/gemina/generated/models/documents_view_out_dto.py +127 -0
  61. gemina-0.1.0/src/gemina/generated/models/enterprise_transaction_list_out_dto.py +117 -0
  62. gemina-0.1.0/src/gemina/generated/models/enterprise_transaction_out_dto.py +148 -0
  63. gemina-0.1.0/src/gemina/generated/models/extraction_document_view_meta_out_dto.py +194 -0
  64. gemina-0.1.0/src/gemina/generated/models/extraction_meta_out_dto.py +158 -0
  65. gemina-0.1.0/src/gemina/generated/models/extraction_primary_view_out_dto.py +125 -0
  66. gemina-0.1.0/src/gemina/generated/models/extraction_processing_result_out_dto.py +104 -0
  67. gemina-0.1.0/src/gemina/generated/models/extraction_type_model.py +42 -0
  68. gemina-0.1.0/src/gemina/generated/models/extraction_validation_in_dto.py +88 -0
  69. gemina-0.1.0/src/gemina/generated/models/extraction_validation_meta_out_dto.py +117 -0
  70. gemina-0.1.0/src/gemina/generated/models/extraction_validation_result_out_dto.py +122 -0
  71. gemina-0.1.0/src/gemina/generated/models/extraction_view_data_out_dto.py +101 -0
  72. gemina-0.1.0/src/gemina/generated/models/extraction_view_meta_out_dto.py +194 -0
  73. gemina-0.1.0/src/gemina/generated/models/extraction_view_out_dto.py +110 -0
  74. gemina-0.1.0/src/gemina/generated/models/extractions_view_out_dto.py +127 -0
  75. gemina-0.1.0/src/gemina/generated/models/feature_type.py +39 -0
  76. gemina-0.1.0/src/gemina/generated/models/field_definition_dto.py +105 -0
  77. gemina-0.1.0/src/gemina/generated/models/field_difference_model.py +116 -0
  78. gemina-0.1.0/src/gemina/generated/models/field_type.py +39 -0
  79. gemina-0.1.0/src/gemina/generated/models/file_tag_balance_out_dto.py +97 -0
  80. gemina-0.1.0/src/gemina/generated/models/file_tag_filename_patterns_out_dto.py +98 -0
  81. gemina-0.1.0/src/gemina/generated/models/file_tag_metadata_out_dto.py +149 -0
  82. gemina-0.1.0/src/gemina/generated/models/file_tag_result_out_dto.py +118 -0
  83. gemina-0.1.0/src/gemina/generated/models/files_create_upload_in_dto.py +113 -0
  84. gemina-0.1.0/src/gemina/generated/models/files_next_tool_call_out_dto.py +102 -0
  85. gemina-0.1.0/src/gemina/generated/models/files_upload_details_out_dto.py +107 -0
  86. gemina-0.1.0/src/gemina/generated/models/files_upload_instructions_out_dto.py +100 -0
  87. gemina-0.1.0/src/gemina/generated/models/free_tier_config_out_dto.py +101 -0
  88. gemina-0.1.0/src/gemina/generated/models/generate_template_from_url_in_dto.py +89 -0
  89. gemina-0.1.0/src/gemina/generated/models/http_validation_error.py +96 -0
  90. gemina-0.1.0/src/gemina/generated/models/invoice_list_out_dto.py +117 -0
  91. gemina-0.1.0/src/gemina/generated/models/invoice_out_dto.py +137 -0
  92. gemina-0.1.0/src/gemina/generated/models/invoice_pdf_out_dto.py +103 -0
  93. gemina-0.1.0/src/gemina/generated/models/invoice_status.py +39 -0
  94. gemina-0.1.0/src/gemina/generated/models/invoice_type.py +38 -0
  95. gemina-0.1.0/src/gemina/generated/models/location_inner.py +138 -0
  96. gemina-0.1.0/src/gemina/generated/models/model_type.py +41 -0
  97. gemina-0.1.0/src/gemina/generated/models/next_action_out_dto.py +106 -0
  98. gemina-0.1.0/src/gemina/generated/models/number_of_stored_documents_out_dto.py +111 -0
  99. gemina-0.1.0/src/gemina/generated/models/operation_type.py +43 -0
  100. gemina-0.1.0/src/gemina/generated/models/period_preset_model.py +41 -0
  101. gemina-0.1.0/src/gemina/generated/models/plan_credit_cost_out_dto.py +129 -0
  102. gemina-0.1.0/src/gemina/generated/models/plan_detail_out_dto.py +173 -0
  103. gemina-0.1.0/src/gemina/generated/models/plan_list_out_dto.py +117 -0
  104. gemina-0.1.0/src/gemina/generated/models/plan_out_dto.py +163 -0
  105. gemina-0.1.0/src/gemina/generated/models/purge_reason_model.py +38 -0
  106. gemina-0.1.0/src/gemina/generated/models/purged_documents_data_out_dto.py +89 -0
  107. gemina-0.1.0/src/gemina/generated/models/query_meta_dto.py +99 -0
  108. gemina-0.1.0/src/gemina/generated/models/query_result_item_dto.py +211 -0
  109. gemina-0.1.0/src/gemina/generated/models/rate_card_out_dto.py +131 -0
  110. gemina-0.1.0/src/gemina/generated/models/response_change_plan.py +134 -0
  111. gemina-0.1.0/src/gemina/generated/models/response_status.py +41 -0
  112. gemina-0.1.0/src/gemina/generated/models/retrieval_aggregate_in_dto.py +123 -0
  113. gemina-0.1.0/src/gemina/generated/models/retrieval_aggregate_out_dto.py +121 -0
  114. gemina-0.1.0/src/gemina/generated/models/retrieval_filters_dto.py +207 -0
  115. gemina-0.1.0/src/gemina/generated/models/retrieval_query_in_dto.py +152 -0
  116. gemina-0.1.0/src/gemina/generated/models/retrieval_query_out_dto.py +121 -0
  117. gemina-0.1.0/src/gemina/generated/models/retrieval_status_out_dto.py +107 -0
  118. gemina-0.1.0/src/gemina/generated/models/session_token_in_dto.py +101 -0
  119. gemina-0.1.0/src/gemina/generated/models/session_token_out_dto.py +113 -0
  120. gemina-0.1.0/src/gemina/generated/models/sort_method_model.py +37 -0
  121. gemina-0.1.0/src/gemina/generated/models/subscribe_in_dto.py +89 -0
  122. gemina-0.1.0/src/gemina/generated/models/subscription_out_dto.py +177 -0
  123. gemina-0.1.0/src/gemina/generated/models/subscription_status.py +40 -0
  124. gemina-0.1.0/src/gemina/generated/models/support_level.py +38 -0
  125. gemina-0.1.0/src/gemina/generated/models/table_definition_dto_input.py +108 -0
  126. gemina-0.1.0/src/gemina/generated/models/tag_file_body_dto.py +101 -0
  127. gemina-0.1.0/src/gemina/generated/models/tag_url_in_dto.py +103 -0
  128. gemina-0.1.0/src/gemina/generated/models/template_list_out_dto.py +98 -0
  129. gemina-0.1.0/src/gemina/generated/models/template_out_dto.py +138 -0
  130. gemina-0.1.0/src/gemina/generated/models/template_schema_dto_input.py +116 -0
  131. gemina-0.1.0/src/gemina/generated/models/template_schema_dto_output.py +116 -0
  132. gemina-0.1.0/src/gemina/generated/models/template_status_model.py +39 -0
  133. gemina-0.1.0/src/gemina/generated/models/top_up_pack_list_out_dto.py +117 -0
  134. gemina-0.1.0/src/gemina/generated/models/top_up_pack_out_dto.py +126 -0
  135. gemina-0.1.0/src/gemina/generated/models/top_up_purchase_in_dto.py +89 -0
  136. gemina-0.1.0/src/gemina/generated/models/top_up_purchase_out_dto.py +115 -0
  137. gemina-0.1.0/src/gemina/generated/models/tos_out_dto.py +95 -0
  138. gemina-0.1.0/src/gemina/generated/models/update_template_in_dto.py +112 -0
  139. gemina-0.1.0/src/gemina/generated/models/usage_breakdown_list_out_dto.py +111 -0
  140. gemina-0.1.0/src/gemina/generated/models/usage_breakdown_out_dto.py +114 -0
  141. gemina-0.1.0/src/gemina/generated/models/usage_line_item_out_dto.py +118 -0
  142. gemina-0.1.0/src/gemina/generated/models/usage_summary_out_dto.py +130 -0
  143. gemina-0.1.0/src/gemina/generated/models/validation_error.py +100 -0
  144. gemina-0.1.0/src/gemina/generated/models/validation_schema_model.py +89 -0
  145. gemina-0.1.0/src/gemina/generated/models/values_value.py +144 -0
  146. gemina-0.1.0/src/gemina/generated/models/view_meta_out_dto.py +104 -0
  147. gemina-0.1.0/src/gemina/generated/models/volume_discount_tier_out_dto.py +117 -0
  148. gemina-0.1.0/src/gemina/generated/models/web_document_upload_in_dto.py +160 -0
  149. gemina-0.1.0/src/gemina/generated/models/webhook_response_dto.py +88 -0
  150. gemina-0.1.0/src/gemina/generated/py.typed +0 -0
  151. gemina-0.1.0/src/gemina/generated/rest.py +201 -0
  152. gemina-0.1.0/src/gemina/helpers.py +423 -0
  153. gemina-0.1.0/src/gemina/py.typed +0 -0
  154. gemina-0.1.0/src/gemina.egg-info/PKG-INFO +298 -0
  155. gemina-0.1.0/src/gemina.egg-info/SOURCES.txt +157 -0
  156. gemina-0.1.0/src/gemina.egg-info/dependency_links.txt +1 -0
  157. gemina-0.1.0/src/gemina.egg-info/requires.txt +8 -0
  158. gemina-0.1.0/src/gemina.egg-info/top_level.txt +1 -0
  159. gemina-0.1.0/tests/test_helpers.py +594 -0
gemina-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Gemina
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
gemina-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,298 @@
1
+ Metadata-Version: 2.4
2
+ Name: gemina
3
+ Version: 0.1.0
4
+ Summary: Official Python SDK for the Gemina API - invoice OCR and document intelligence: upload documents, get typed structured data back, then search, aggregate, and chat over everything you've processed.
5
+ Author-email: Gemina <support@gemina.co>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://gemina.co
8
+ Project-URL: Repository, https://github.com/tommyil/gemina-sdk
9
+ Project-URL: Documentation, https://console.gemina.co/docs
10
+ Keywords: gemina,invoice,ocr,document-intelligence,api,sdk
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Requires-Python: >=3.9
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: httpx<1.0,>=0.28.1
24
+ Requires-Dist: pydantic<3.0,>=2.11
25
+ Requires-Dist: python-dateutil<3.0,>=2.8.2
26
+ Requires-Dist: typing-extensions>=4.7.1
27
+ Provides-Extra: dev
28
+ Requires-Dist: pytest>=8.0; extra == "dev"
29
+ Requires-Dist: pytest-asyncio>=0.24; extra == "dev"
30
+ Dynamic: license-file
31
+
32
+ # Gemina Python SDK
33
+
34
+ The official Python client for the Gemina API — invoice OCR and document
35
+ intelligence: upload documents, get typed structured data back, then search,
36
+ aggregate, and chat over everything you've processed. Fully async (httpx +
37
+ pydantic v2), with typed models for every request and response.
38
+
39
+ ## Install
40
+
41
+ ```bash
42
+ pip install gemina
43
+ ```
44
+
45
+ Requires Python 3.9 or newer.
46
+
47
+ ## Authenticate
48
+
49
+ Get an API key from the [Gemina Console](https://console.gemina.co). The
50
+ client sends it as the `X-API-Key` header on every request — you never handle
51
+ the header yourself:
52
+
53
+ ```python
54
+ from gemina import GeminaClient
55
+
56
+ client = GeminaClient("YOUR_API_KEY")
57
+ ```
58
+
59
+ Never ship the API key in browser or mobile code. For browser embedding, mint
60
+ short-lived session tokens server-side (`POST /v1/sessions/token`) and hand
61
+ those to the frontend — see
62
+ [Session tokens](#session-tokens-browser-embedding) below and the Document
63
+ Intelligence guide at [console.gemina.co/docs](https://console.gemina.co/docs).
64
+
65
+ ## Quickstart — process an invoice in one call
66
+
67
+ `process_document` submits the document through the async endpoints, polls
68
+ with exponential backoff until processing finishes, and returns the final
69
+ typed result — one call, no polling loop to write:
70
+
71
+ ```python
72
+ import asyncio
73
+ from gemina import GeminaClient, ExtractionTypeModel
74
+
75
+ async def main():
76
+ async with GeminaClient("YOUR_API_KEY") as client:
77
+ result = await client.process_document(
78
+ "invoice.png", # path, bytes, or a binary file object
79
+ [ExtractionTypeModel.INVOICE_HEADERS],
80
+ )
81
+ values = result.data.extractions[0].values
82
+ print("Supplier:", values["vendorName"]["value"])
83
+ print("Total: ", values["totalAmount"]["value"], values["currency"]["value"])
84
+ print("Date: ", values["invoiceDate"]["value"])
85
+
86
+ asyncio.run(main())
87
+ ```
88
+
89
+ To process a document that lives at a URL, wrap it in `UrlSource`:
90
+
91
+ ```python
92
+ from gemina import GeminaClient, ExtractionTypeModel, UrlSource
93
+
94
+ async def from_url():
95
+ async with GeminaClient("YOUR_API_KEY") as client:
96
+ result = await client.process_document(
97
+ UrlSource("https://example.com/invoice.pdf"),
98
+ [ExtractionTypeModel.INVOICE_HEADERS],
99
+ )
100
+ print(result.status)
101
+ ```
102
+
103
+ ## What you get back
104
+
105
+ `process_document` returns a `DocumentProcessingResultOutDTO`:
106
+
107
+ - `result.status` — `success | partial | empty | failed` (`failed` raises
108
+ `GeminaProcessingError` instead of returning; `partial` and `empty` are
109
+ returned and still carry usable data and meta).
110
+ - `result.data.extractions` — one entry per requested extraction type; each
111
+ has `.meta.extraction_type`, `.status`, and `.values`.
112
+ - `result.meta.document_id` / `result.meta.correlation_id` — the stored
113
+ document's ID and the async request's correlation ID.
114
+
115
+ `extraction.values` is a dict keyed by field name (camelCase). Each field is
116
+ either `None` (not found) or an object with `value`, `coordinates` (present
117
+ when you pass `include_coordinates=True`), and `confidence`:
118
+
119
+ ```python
120
+ values["vendorName"] # {"value": "Acme Ltd", "coordinates": {...}, "confidence": ...}
121
+ values["totalAmount"]["value"] # 1572.0
122
+ values["invoiceDate"]["value"] # "2020-08-31"
123
+ values["taxes"] # list: [{"type": "vat", "rate": 17.0, "amount": 228.41, ...}]
124
+ ```
125
+
126
+ Extraction types:
127
+
128
+ | Extraction type | What it extracts |
129
+ |---|---|
130
+ | `ocr` | Raw text of the document |
131
+ | `invoice_headers` | Invoice header fields: vendor/buyer, number, dates, amounts, taxes |
132
+ | `invoice_line_items` | Line items table |
133
+ | `document_details_hebrew` | Hebrew document header fields |
134
+ | `document_line_items_hebrew` | Hebrew line items |
135
+ | `custom_template` | Fields defined by your template (pass `template_id=...`) |
136
+ | `filetag` | File classification and naming metadata |
137
+
138
+ ## Search & aggregate your documents
139
+
140
+ Everything you process is indexed for retrieval. Query with natural language
141
+ and/or structured filters — results carry `document_id` citations back to the
142
+ original documents:
143
+
144
+ ```python
145
+ from gemina import GeminaClient, RetrievalQueryInDTO
146
+ from gemina.generated.models.retrieval_filters_dto import RetrievalFiltersDTO
147
+
148
+ async def search():
149
+ async with GeminaClient("YOUR_API_KEY") as client:
150
+ page = await client.retrieval.retrieval_query(RetrievalQueryInDTO(
151
+ mode="hybrid", # structured | semantic | hybrid
152
+ text="cleaning services",
153
+ filters=RetrievalFiltersDTO(total_amount_min=100),
154
+ top_k=5,
155
+ ))
156
+ for item in page.items:
157
+ print(item.vendor_name, item.total_amount, item.currency,
158
+ item.issue_date, item.document_id)
159
+ ```
160
+
161
+ Aggregate across your documents (sum/avg/min/max/count, grouped by up to four
162
+ dimensions — when you aggregate money without fixing a currency, the server
163
+ adds a `currency` grouping so different currencies are never summed together):
164
+
165
+ ```python
166
+ from gemina import GeminaClient, RetrievalAggregateInDTO
167
+ from gemina.generated.models.aggregate_metric_dto import AggregateMetricDTO
168
+
169
+ async def totals_by_vendor():
170
+ async with GeminaClient("YOUR_API_KEY") as client:
171
+ report = await client.retrieval.retrieval_aggregate(RetrievalAggregateInDTO(
172
+ metrics=[
173
+ AggregateMetricDTO(op="sum", field="total_amount"),
174
+ AggregateMetricDTO(op="count"),
175
+ ],
176
+ group_by=["vendor_name"],
177
+ ))
178
+ for row in report.rows:
179
+ print(row.group, row.values["sum_total_amount"].actual_instance,
180
+ row.values["count"].actual_instance)
181
+ ```
182
+
183
+ `client.retrieval.retrieval_status()` tells you how many of your documents
184
+ are currently indexed.
185
+
186
+ ## Chat with your documents
187
+
188
+ Ask questions in natural language; answers come back with a `confident` flag
189
+ and `citations` (document IDs the answer relies on):
190
+
191
+ ```python
192
+ from gemina import GeminaClient, ChatQueryInDTO
193
+
194
+ async def ask():
195
+ async with GeminaClient("YOUR_API_KEY") as client:
196
+ reply = await client.chat.chat_query(ChatQueryInDTO(
197
+ message="What is the total amount of my invoices from last month?",
198
+ ))
199
+ print(reply.answer)
200
+ print("confident:", reply.confident)
201
+ print("citations:", reply.citations)
202
+ ```
203
+
204
+ Chat requires a plan with Document Intelligence enabled — see
205
+ [pricing](https://gemina.co); without it these calls return 402/403.
206
+
207
+ ## Session tokens (browser embedding)
208
+
209
+ For browser or end-user contexts, mint a short-lived, query-only session
210
+ token server-side and hand *that* to your frontend — never the API key. An
211
+ optional `end_user_id` scopes the token to a single end-user's documents:
212
+
213
+ ```python
214
+ from gemina import GeminaClient, SessionTokenInDTO
215
+
216
+ async def mint_token():
217
+ async with GeminaClient("YOUR_API_KEY") as client: # server-side only
218
+ token = await client.sessions.mint_retrieval_token(SessionTokenInDTO(
219
+ end_user_id="customer-42", # omit for a whole-account session
220
+ ttl_seconds=600, # clamped server-side to [300, 900]
221
+ ))
222
+ return token.token # ship this to the frontend
223
+ ```
224
+
225
+ Token-authenticated clients (for server-side use of a token, or testing) are
226
+ created with `GeminaClient.with_session_token(token)`; tokens can call the
227
+ retrieval query and chat endpoints only. For a drop-in chat UI in the
228
+ browser, see the `@gemina/elements` package on npm.
229
+
230
+ ## Going deeper
231
+
232
+ **Full API surface.** Every generated endpoint group is exposed on the client
233
+ — `client.documents`, `client.retrieval`, `client.chat`, `client.templates`,
234
+ `client.files`, `client.file_tag`, `client.sessions`, `client.subscriptions`,
235
+ `client.billing` — with zero wrapping. For example, listing stored documents:
236
+
237
+ ```python
238
+ async def list_documents():
239
+ async with GeminaClient("YOUR_API_KEY") as client:
240
+ page = await client.documents.find_documents(limit=10)
241
+ for doc in page.data.documents:
242
+ print(doc.meta.document_id, doc.meta.created_at)
243
+ ```
244
+
245
+ **Polling knobs.** `process_document` accepts `timeout_seconds` (default 300),
246
+ `initial_interval_seconds` (default 2.0) and `max_interval_seconds` (default
247
+ 15.0). The wait grows 1.5x per poll, capped at the max, with +/-20% jitter.
248
+ Transient poll failures (connection blips, 5xx) are retried automatically on
249
+ the same schedule; after 3 consecutive failures the error is raised. On
250
+ timeout, `GeminaTimeoutError` carries `.correlation_id` and `.last_result`
251
+ so you can resume polling yourself:
252
+
253
+ ```python
254
+ from gemina import GeminaError, GeminaProcessingError, GeminaTimeoutError
255
+
256
+ async def robust():
257
+ async with GeminaClient("YOUR_API_KEY") as client:
258
+ try:
259
+ result = await client.process_document(
260
+ "invoice.pdf",
261
+ [ExtractionTypeModel.INVOICE_HEADERS],
262
+ timeout_seconds=120,
263
+ )
264
+ except GeminaProcessingError as exc:
265
+ print("processing failed:", exc.result.errors)
266
+ except GeminaTimeoutError as exc:
267
+ print("still running, poll later:", exc.correlation_id)
268
+ result = await client.documents.\
269
+ get_document_processing_result_by_correlation_id(exc.correlation_id)
270
+ ```
271
+
272
+ **Error handling.** Terminal `failed` results raise `GeminaProcessingError`
273
+ (`.result.errors` has the details). Transport and HTTP errors from the
274
+ generated client (e.g. `gemina.generated.exceptions.ApiException` subclasses
275
+ for 4xx/5xx) pass through unwrapped. All hand-written errors subclass
276
+ `GeminaError`.
277
+
278
+ **Custom base URL** (staging / self-hosted):
279
+
280
+ ```python
281
+ client = GeminaClient("YOUR_API_KEY", base_url="https://api.staging.gemina.co")
282
+ ```
283
+
284
+ **Using the SDK from synchronous code.** The client is async-first; from a
285
+ sync program, run calls with `asyncio.run(...)`:
286
+
287
+ ```python
288
+ import asyncio
289
+
290
+ result = asyncio.run(main()) # where main() is an async def using GeminaClient
291
+ ```
292
+
293
+ ## Requirements & support
294
+
295
+ - Python >= 3.9
296
+ - Docs: [console.gemina.co/docs](https://console.gemina.co/docs)
297
+ - Issues: [github.com/tommyil/gemina-sdk/issues](https://github.com/tommyil/gemina-sdk/issues)
298
+ - Email: support@gemina.co
gemina-0.1.0/README.md ADDED
@@ -0,0 +1,267 @@
1
+ # Gemina Python SDK
2
+
3
+ The official Python client for the Gemina API — invoice OCR and document
4
+ intelligence: upload documents, get typed structured data back, then search,
5
+ aggregate, and chat over everything you've processed. Fully async (httpx +
6
+ pydantic v2), with typed models for every request and response.
7
+
8
+ ## Install
9
+
10
+ ```bash
11
+ pip install gemina
12
+ ```
13
+
14
+ Requires Python 3.9 or newer.
15
+
16
+ ## Authenticate
17
+
18
+ Get an API key from the [Gemina Console](https://console.gemina.co). The
19
+ client sends it as the `X-API-Key` header on every request — you never handle
20
+ the header yourself:
21
+
22
+ ```python
23
+ from gemina import GeminaClient
24
+
25
+ client = GeminaClient("YOUR_API_KEY")
26
+ ```
27
+
28
+ Never ship the API key in browser or mobile code. For browser embedding, mint
29
+ short-lived session tokens server-side (`POST /v1/sessions/token`) and hand
30
+ those to the frontend — see
31
+ [Session tokens](#session-tokens-browser-embedding) below and the Document
32
+ Intelligence guide at [console.gemina.co/docs](https://console.gemina.co/docs).
33
+
34
+ ## Quickstart — process an invoice in one call
35
+
36
+ `process_document` submits the document through the async endpoints, polls
37
+ with exponential backoff until processing finishes, and returns the final
38
+ typed result — one call, no polling loop to write:
39
+
40
+ ```python
41
+ import asyncio
42
+ from gemina import GeminaClient, ExtractionTypeModel
43
+
44
+ async def main():
45
+ async with GeminaClient("YOUR_API_KEY") as client:
46
+ result = await client.process_document(
47
+ "invoice.png", # path, bytes, or a binary file object
48
+ [ExtractionTypeModel.INVOICE_HEADERS],
49
+ )
50
+ values = result.data.extractions[0].values
51
+ print("Supplier:", values["vendorName"]["value"])
52
+ print("Total: ", values["totalAmount"]["value"], values["currency"]["value"])
53
+ print("Date: ", values["invoiceDate"]["value"])
54
+
55
+ asyncio.run(main())
56
+ ```
57
+
58
+ To process a document that lives at a URL, wrap it in `UrlSource`:
59
+
60
+ ```python
61
+ from gemina import GeminaClient, ExtractionTypeModel, UrlSource
62
+
63
+ async def from_url():
64
+ async with GeminaClient("YOUR_API_KEY") as client:
65
+ result = await client.process_document(
66
+ UrlSource("https://example.com/invoice.pdf"),
67
+ [ExtractionTypeModel.INVOICE_HEADERS],
68
+ )
69
+ print(result.status)
70
+ ```
71
+
72
+ ## What you get back
73
+
74
+ `process_document` returns a `DocumentProcessingResultOutDTO`:
75
+
76
+ - `result.status` — `success | partial | empty | failed` (`failed` raises
77
+ `GeminaProcessingError` instead of returning; `partial` and `empty` are
78
+ returned and still carry usable data and meta).
79
+ - `result.data.extractions` — one entry per requested extraction type; each
80
+ has `.meta.extraction_type`, `.status`, and `.values`.
81
+ - `result.meta.document_id` / `result.meta.correlation_id` — the stored
82
+ document's ID and the async request's correlation ID.
83
+
84
+ `extraction.values` is a dict keyed by field name (camelCase). Each field is
85
+ either `None` (not found) or an object with `value`, `coordinates` (present
86
+ when you pass `include_coordinates=True`), and `confidence`:
87
+
88
+ ```python
89
+ values["vendorName"] # {"value": "Acme Ltd", "coordinates": {...}, "confidence": ...}
90
+ values["totalAmount"]["value"] # 1572.0
91
+ values["invoiceDate"]["value"] # "2020-08-31"
92
+ values["taxes"] # list: [{"type": "vat", "rate": 17.0, "amount": 228.41, ...}]
93
+ ```
94
+
95
+ Extraction types:
96
+
97
+ | Extraction type | What it extracts |
98
+ |---|---|
99
+ | `ocr` | Raw text of the document |
100
+ | `invoice_headers` | Invoice header fields: vendor/buyer, number, dates, amounts, taxes |
101
+ | `invoice_line_items` | Line items table |
102
+ | `document_details_hebrew` | Hebrew document header fields |
103
+ | `document_line_items_hebrew` | Hebrew line items |
104
+ | `custom_template` | Fields defined by your template (pass `template_id=...`) |
105
+ | `filetag` | File classification and naming metadata |
106
+
107
+ ## Search & aggregate your documents
108
+
109
+ Everything you process is indexed for retrieval. Query with natural language
110
+ and/or structured filters — results carry `document_id` citations back to the
111
+ original documents:
112
+
113
+ ```python
114
+ from gemina import GeminaClient, RetrievalQueryInDTO
115
+ from gemina.generated.models.retrieval_filters_dto import RetrievalFiltersDTO
116
+
117
+ async def search():
118
+ async with GeminaClient("YOUR_API_KEY") as client:
119
+ page = await client.retrieval.retrieval_query(RetrievalQueryInDTO(
120
+ mode="hybrid", # structured | semantic | hybrid
121
+ text="cleaning services",
122
+ filters=RetrievalFiltersDTO(total_amount_min=100),
123
+ top_k=5,
124
+ ))
125
+ for item in page.items:
126
+ print(item.vendor_name, item.total_amount, item.currency,
127
+ item.issue_date, item.document_id)
128
+ ```
129
+
130
+ Aggregate across your documents (sum/avg/min/max/count, grouped by up to four
131
+ dimensions — when you aggregate money without fixing a currency, the server
132
+ adds a `currency` grouping so different currencies are never summed together):
133
+
134
+ ```python
135
+ from gemina import GeminaClient, RetrievalAggregateInDTO
136
+ from gemina.generated.models.aggregate_metric_dto import AggregateMetricDTO
137
+
138
+ async def totals_by_vendor():
139
+ async with GeminaClient("YOUR_API_KEY") as client:
140
+ report = await client.retrieval.retrieval_aggregate(RetrievalAggregateInDTO(
141
+ metrics=[
142
+ AggregateMetricDTO(op="sum", field="total_amount"),
143
+ AggregateMetricDTO(op="count"),
144
+ ],
145
+ group_by=["vendor_name"],
146
+ ))
147
+ for row in report.rows:
148
+ print(row.group, row.values["sum_total_amount"].actual_instance,
149
+ row.values["count"].actual_instance)
150
+ ```
151
+
152
+ `client.retrieval.retrieval_status()` tells you how many of your documents
153
+ are currently indexed.
154
+
155
+ ## Chat with your documents
156
+
157
+ Ask questions in natural language; answers come back with a `confident` flag
158
+ and `citations` (document IDs the answer relies on):
159
+
160
+ ```python
161
+ from gemina import GeminaClient, ChatQueryInDTO
162
+
163
+ async def ask():
164
+ async with GeminaClient("YOUR_API_KEY") as client:
165
+ reply = await client.chat.chat_query(ChatQueryInDTO(
166
+ message="What is the total amount of my invoices from last month?",
167
+ ))
168
+ print(reply.answer)
169
+ print("confident:", reply.confident)
170
+ print("citations:", reply.citations)
171
+ ```
172
+
173
+ Chat requires a plan with Document Intelligence enabled — see
174
+ [pricing](https://gemina.co); without it these calls return 402/403.
175
+
176
+ ## Session tokens (browser embedding)
177
+
178
+ For browser or end-user contexts, mint a short-lived, query-only session
179
+ token server-side and hand *that* to your frontend — never the API key. An
180
+ optional `end_user_id` scopes the token to a single end-user's documents:
181
+
182
+ ```python
183
+ from gemina import GeminaClient, SessionTokenInDTO
184
+
185
+ async def mint_token():
186
+ async with GeminaClient("YOUR_API_KEY") as client: # server-side only
187
+ token = await client.sessions.mint_retrieval_token(SessionTokenInDTO(
188
+ end_user_id="customer-42", # omit for a whole-account session
189
+ ttl_seconds=600, # clamped server-side to [300, 900]
190
+ ))
191
+ return token.token # ship this to the frontend
192
+ ```
193
+
194
+ Token-authenticated clients (for server-side use of a token, or testing) are
195
+ created with `GeminaClient.with_session_token(token)`; tokens can call the
196
+ retrieval query and chat endpoints only. For a drop-in chat UI in the
197
+ browser, see the `@gemina/elements` package on npm.
198
+
199
+ ## Going deeper
200
+
201
+ **Full API surface.** Every generated endpoint group is exposed on the client
202
+ — `client.documents`, `client.retrieval`, `client.chat`, `client.templates`,
203
+ `client.files`, `client.file_tag`, `client.sessions`, `client.subscriptions`,
204
+ `client.billing` — with zero wrapping. For example, listing stored documents:
205
+
206
+ ```python
207
+ async def list_documents():
208
+ async with GeminaClient("YOUR_API_KEY") as client:
209
+ page = await client.documents.find_documents(limit=10)
210
+ for doc in page.data.documents:
211
+ print(doc.meta.document_id, doc.meta.created_at)
212
+ ```
213
+
214
+ **Polling knobs.** `process_document` accepts `timeout_seconds` (default 300),
215
+ `initial_interval_seconds` (default 2.0) and `max_interval_seconds` (default
216
+ 15.0). The wait grows 1.5x per poll, capped at the max, with +/-20% jitter.
217
+ Transient poll failures (connection blips, 5xx) are retried automatically on
218
+ the same schedule; after 3 consecutive failures the error is raised. On
219
+ timeout, `GeminaTimeoutError` carries `.correlation_id` and `.last_result`
220
+ so you can resume polling yourself:
221
+
222
+ ```python
223
+ from gemina import GeminaError, GeminaProcessingError, GeminaTimeoutError
224
+
225
+ async def robust():
226
+ async with GeminaClient("YOUR_API_KEY") as client:
227
+ try:
228
+ result = await client.process_document(
229
+ "invoice.pdf",
230
+ [ExtractionTypeModel.INVOICE_HEADERS],
231
+ timeout_seconds=120,
232
+ )
233
+ except GeminaProcessingError as exc:
234
+ print("processing failed:", exc.result.errors)
235
+ except GeminaTimeoutError as exc:
236
+ print("still running, poll later:", exc.correlation_id)
237
+ result = await client.documents.\
238
+ get_document_processing_result_by_correlation_id(exc.correlation_id)
239
+ ```
240
+
241
+ **Error handling.** Terminal `failed` results raise `GeminaProcessingError`
242
+ (`.result.errors` has the details). Transport and HTTP errors from the
243
+ generated client (e.g. `gemina.generated.exceptions.ApiException` subclasses
244
+ for 4xx/5xx) pass through unwrapped. All hand-written errors subclass
245
+ `GeminaError`.
246
+
247
+ **Custom base URL** (staging / self-hosted):
248
+
249
+ ```python
250
+ client = GeminaClient("YOUR_API_KEY", base_url="https://api.staging.gemina.co")
251
+ ```
252
+
253
+ **Using the SDK from synchronous code.** The client is async-first; from a
254
+ sync program, run calls with `asyncio.run(...)`:
255
+
256
+ ```python
257
+ import asyncio
258
+
259
+ result = asyncio.run(main()) # where main() is an async def using GeminaClient
260
+ ```
261
+
262
+ ## Requirements & support
263
+
264
+ - Python >= 3.9
265
+ - Docs: [console.gemina.co/docs](https://console.gemina.co/docs)
266
+ - Issues: [github.com/tommyil/gemina-sdk/issues](https://github.com/tommyil/gemina-sdk/issues)
267
+ - Email: support@gemina.co
@@ -0,0 +1,52 @@
1
+ [build-system]
2
+ requires = ["setuptools>=77"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "gemina"
7
+ version = "0.1.0"
8
+ description = "Official Python SDK for the Gemina API - invoice OCR and document intelligence: upload documents, get typed structured data back, then search, aggregate, and chat over everything you've processed."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.9"
12
+ authors = [{ name = "Gemina", email = "support@gemina.co" }]
13
+ keywords = ["gemina", "invoice", "ocr", "document-intelligence", "api", "sdk"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3.9",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ "Topic :: Software Development :: Libraries :: Python Modules",
24
+ ]
25
+ dependencies = [
26
+ "httpx>=0.28.1,<1.0",
27
+ "pydantic>=2.11,<3.0",
28
+ "python-dateutil>=2.8.2,<3.0",
29
+ "typing-extensions>=4.7.1",
30
+ ]
31
+
32
+ [project.optional-dependencies]
33
+ dev = [
34
+ "pytest>=8.0",
35
+ "pytest-asyncio>=0.24",
36
+ ]
37
+
38
+ [project.urls]
39
+ Homepage = "https://gemina.co"
40
+ Repository = "https://github.com/tommyil/gemina-sdk"
41
+ Documentation = "https://console.gemina.co/docs"
42
+
43
+ [tool.setuptools.packages.find]
44
+ where = ["src"]
45
+
46
+ [tool.setuptools.package-data]
47
+ gemina = ["py.typed"]
48
+ "gemina.generated" = ["py.typed"]
49
+
50
+ [tool.pytest.ini_options]
51
+ asyncio_mode = "auto"
52
+ testpaths = ["tests"]
gemina-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+