gaik 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gaik-0.2.7.dist-info → gaik-0.2.9.dist-info}/METADATA +68 -40
- {gaik-0.2.7.dist-info → gaik-0.2.9.dist-info}/RECORD +5 -5
- {gaik-0.2.7.dist-info → gaik-0.2.9.dist-info}/WHEEL +0 -0
- {gaik-0.2.7.dist-info → gaik-0.2.9.dist-info}/licenses/LICENSE +0 -0
- {gaik-0.2.7.dist-info → gaik-0.2.9.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gaik
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.9
|
|
4
4
|
Summary: General AI Kit - Reusable AI/ML components for Python
|
|
5
5
|
Author: GAIK Project
|
|
6
6
|
License: MIT License
|
|
@@ -98,58 +98,65 @@ pip install gaik[vision]
|
|
|
98
98
|
|
|
99
99
|
## Quick Start
|
|
100
100
|
|
|
101
|
-
###
|
|
101
|
+
### Extract Data from Text
|
|
102
102
|
|
|
103
|
-
|
|
103
|
+
Set your API key (choose one):
|
|
104
104
|
|
|
105
105
|
```bash
|
|
106
|
-
export OPENAI_API_KEY='sk-...'
|
|
106
|
+
export OPENAI_API_KEY='sk-...' # OpenAI (default)
|
|
107
|
+
export ANTHROPIC_API_KEY='sk-ant-...' # Anthropic
|
|
108
|
+
export GOOGLE_API_KEY='...' # Google
|
|
109
|
+
export AZURE_API_KEY='...' # Azure
|
|
110
|
+
export AZURE_ENDPOINT='https://...' # Azure (also required)
|
|
107
111
|
```
|
|
108
112
|
|
|
109
|
-
|
|
113
|
+
Then extract:
|
|
110
114
|
|
|
111
|
-
```
|
|
112
|
-
|
|
113
|
-
```
|
|
115
|
+
```python
|
|
116
|
+
from gaik.extract import SchemaExtractor
|
|
114
117
|
|
|
115
|
-
|
|
118
|
+
# Using default OpenAI provider
|
|
119
|
+
extractor = SchemaExtractor("Extract name and age from text")
|
|
120
|
+
result = extractor.extract_one("Alice is 25 years old")
|
|
121
|
+
print(result)
|
|
122
|
+
# {'name': 'Alice', 'age': 25}
|
|
116
123
|
|
|
117
|
-
|
|
118
|
-
|
|
124
|
+
# Switch provider
|
|
125
|
+
extractor = SchemaExtractor("Extract name and age", provider="anthropic") # or "google", "azure"
|
|
119
126
|
```
|
|
120
127
|
|
|
121
|
-
|
|
128
|
+
### Convert PDF to Markdown
|
|
122
129
|
|
|
130
|
+
Requires: `pip install gaik[vision]`
|
|
131
|
+
|
|
132
|
+
Set environment variables first:
|
|
123
133
|
```bash
|
|
134
|
+
# For Azure OpenAI
|
|
124
135
|
export AZURE_API_KEY='...'
|
|
125
136
|
export AZURE_ENDPOINT='https://your-resource.openai.azure.com/'
|
|
126
|
-
|
|
137
|
+
export AZURE_DEPLOYMENT='gpt-4o' # Your deployment name
|
|
127
138
|
|
|
128
|
-
|
|
139
|
+
# OR for OpenAI
|
|
140
|
+
export OPENAI_API_KEY='sk-...'
|
|
141
|
+
```
|
|
129
142
|
|
|
143
|
+
Then convert:
|
|
130
144
|
```python
|
|
131
|
-
from gaik.
|
|
145
|
+
from gaik.parsers import VisionParser, get_openai_config
|
|
132
146
|
|
|
133
|
-
#
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
print(result)
|
|
137
|
-
# {'name': 'Alice', 'age': 25}
|
|
147
|
+
# Configure
|
|
148
|
+
config = get_openai_config(use_azure=True) # or use_azure=False for OpenAI
|
|
149
|
+
parser = VisionParser(config)
|
|
138
150
|
|
|
139
|
-
#
|
|
140
|
-
|
|
141
|
-
"Extract name and age from text",
|
|
142
|
-
provider="anthropic"
|
|
143
|
-
)
|
|
151
|
+
# Convert PDF (returns list of pages as Markdown strings)
|
|
152
|
+
pages = parser.convert_pdf("invoice.pdf", clean_output=True)
|
|
144
153
|
|
|
145
|
-
#
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
provider="google"
|
|
149
|
-
)
|
|
154
|
+
# Combine into single document
|
|
155
|
+
markdown = "\n\n".join(pages)
|
|
156
|
+
print(markdown)
|
|
150
157
|
```
|
|
151
158
|
|
|
152
|
-
###
|
|
159
|
+
### Batch Extraction
|
|
153
160
|
|
|
154
161
|
```python
|
|
155
162
|
from gaik.extract import dynamic_extraction_workflow
|
|
@@ -232,6 +239,8 @@ schema = InvoiceModel.model_json_schema()
|
|
|
232
239
|
|
|
233
240
|
## API Reference
|
|
234
241
|
|
|
242
|
+
### Extraction API
|
|
243
|
+
|
|
235
244
|
| Function/Class | Purpose |
|
|
236
245
|
| ------------------------------- | ------------------------------------------------- |
|
|
237
246
|
| `SchemaExtractor` | Reusable extractor with provider selection |
|
|
@@ -240,7 +249,15 @@ schema = InvoiceModel.model_json_schema()
|
|
|
240
249
|
| `FieldSpec` | Define a single extraction field |
|
|
241
250
|
| `ExtractionRequirements` | Collection of field specifications |
|
|
242
251
|
|
|
243
|
-
###
|
|
252
|
+
### Vision Parser API
|
|
253
|
+
|
|
254
|
+
| Function/Class | Purpose |
|
|
255
|
+
| --------------------- | ------------------------------------------ |
|
|
256
|
+
| `VisionParser` | PDF to Markdown converter using vision LLM |
|
|
257
|
+
| `get_openai_config()` | Helper to configure OpenAI/Azure API |
|
|
258
|
+
| `OpenAIConfig` | Configuration dataclass for vision parser |
|
|
259
|
+
|
|
260
|
+
### Extraction Parameters
|
|
244
261
|
|
|
245
262
|
```python
|
|
246
263
|
SchemaExtractor(
|
|
@@ -253,18 +270,29 @@ SchemaExtractor(
|
|
|
253
270
|
)
|
|
254
271
|
```
|
|
255
272
|
|
|
256
|
-
|
|
273
|
+
### Vision Parser Parameters
|
|
274
|
+
|
|
275
|
+
```python
|
|
276
|
+
VisionParser(config: OpenAIConfig)
|
|
277
|
+
|
|
278
|
+
get_openai_config(
|
|
279
|
+
use_azure: bool = True, # True for Azure, False for OpenAI
|
|
280
|
+
) -> OpenAIConfig
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
**Environment variables (auto-detected):**
|
|
257
284
|
|
|
258
|
-
-
|
|
259
|
-
-
|
|
260
|
-
- Using `requirements` skips LLM parsing step (faster & cheaper)
|
|
285
|
+
- OpenAI: `OPENAI_API_KEY`
|
|
286
|
+
- Azure: `AZURE_API_KEY` + `AZURE_ENDPOINT` + `AZURE_DEPLOYMENT` (optional: `AZURE_API_VERSION`)
|
|
261
287
|
|
|
262
288
|
## Default Models
|
|
263
289
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
290
|
+
| Provider | Default Model |
|
|
291
|
+
| --------- | ---------------------------- |
|
|
292
|
+
| OpenAI | `gpt-4.1` |
|
|
293
|
+
| Anthropic | `claude-sonnet-4-5-20250929` |
|
|
294
|
+
| Google | `gemini-2.5-flash` |
|
|
295
|
+
| Azure | `gpt-4.1` (or your deployment) |
|
|
268
296
|
|
|
269
297
|
## Resources
|
|
270
298
|
|
|
@@ -11,8 +11,8 @@ gaik/providers/azure.py,sha256=lRCn04v57BM0vDHNtb9Uc3lzumrL4p3J0VuWB8RS5lY,2161
|
|
|
11
11
|
gaik/providers/base.py,sha256=upH3S0QbKQSEC99mOF48LtKFcK5g3TFyd0GVxVuvgUk,1885
|
|
12
12
|
gaik/providers/google.py,sha256=t5p4uBb3v81KgZ-JQ7N7oKIS-0doCem1IV4VThfLKIY,1690
|
|
13
13
|
gaik/providers/openai.py,sha256=OUyc_a01oNEqtMO8KnHZmwqJp_hEQQC1FXfXelNNGPU,1559
|
|
14
|
-
gaik-0.2.
|
|
15
|
-
gaik-0.2.
|
|
16
|
-
gaik-0.2.
|
|
17
|
-
gaik-0.2.
|
|
18
|
-
gaik-0.2.
|
|
14
|
+
gaik-0.2.9.dist-info/licenses/LICENSE,sha256=jE12SAuBnlcHf16GXD5xfZNtSH6ds30U5LniIt_gRis,1087
|
|
15
|
+
gaik-0.2.9.dist-info/METADATA,sha256=Xz95byGbtzyZ5BsYalpFMJysGHqpA3cYuLuhhRBQdYY,10216
|
|
16
|
+
gaik-0.2.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
17
|
+
gaik-0.2.9.dist-info/top_level.txt,sha256=p-eYZovskBEyr7C7GfsXIYQta9-fGyUSaFYDTX8oky8,5
|
|
18
|
+
gaik-0.2.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|