gaik 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ """OpenAI provider implementation."""
2
+
3
+ from typing import Any
4
+
5
+ from langchain_core.language_models import BaseChatModel
6
+ from langchain_openai import ChatOpenAI
7
+
8
+ from .base import LLMProvider, _build_model_kwargs
9
+
10
+
11
+ class OpenAIProvider(LLMProvider):
12
+ """OpenAI LLM provider using LangChain's ChatOpenAI.
13
+
14
+ Supports OpenAI's GPT models including GPT-4, GPT-4 Turbo, and GPT-3.5.
15
+ Requires OPENAI_API_KEY environment variable or api_key parameter.
16
+ """
17
+
18
+ @property
19
+ def default_model(self) -> str:
20
+ """Return OpenAI's default model.
21
+
22
+ Returns:
23
+ str: "gpt-4.1"
24
+ """
25
+ return "gpt-4.1"
26
+
27
+ def create_chat_model(
28
+ self,
29
+ model: str | None = None,
30
+ api_key: str | None = None,
31
+ **kwargs: Any,
32
+ ) -> BaseChatModel:
33
+ """Create OpenAI chat model instance.
34
+
35
+ Args:
36
+ model: Model name (e.g., "gpt-4.1", "gpt-4o", "gpt-3.5-turbo").
37
+ Defaults to "gpt-4.1".
38
+ api_key: OpenAI API key. If None, uses OPENAI_API_KEY environment variable.
39
+ **kwargs: Additional parameters passed to ChatOpenAI (e.g., temperature, max_tokens).
40
+
41
+ Returns:
42
+ ChatOpenAI: Configured OpenAI chat model
43
+
44
+ Example:
45
+ >>> provider = OpenAIProvider()
46
+ >>> model = provider.create_chat_model(model="gpt-4o", temperature=0.7)
47
+ """
48
+ model_kwargs = _build_model_kwargs(
49
+ model=model or self.default_model, api_key=api_key, **kwargs
50
+ )
51
+ return ChatOpenAI(**model_kwargs)
@@ -0,0 +1,278 @@
1
+ Metadata-Version: 2.4
2
+ Name: gaik
3
+ Version: 0.2.7
4
+ Summary: General AI Kit - Reusable AI/ML components for Python
5
+ Author: GAIK Project
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 GAIK - GenAI for knowledge mgt
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://gaik.ai/
29
+ Project-URL: Repository, https://github.com/GAIK-project/toolkit-shared-components
30
+ Project-URL: Documentation, https://github.com/GAIK-project/toolkit-shared-components/tree/main/gaik-py
31
+ Project-URL: Issues, https://github.com/GAIK-project/toolkit-shared-components/issues
32
+ Keywords: ai,ml,langchain,openai,anthropic,google,structured-outputs,pydantic,schema,extraction
33
+ Classifier: Development Status :: 3 - Alpha
34
+ Classifier: Intended Audience :: Developers
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Programming Language :: Python :: 3
37
+ Classifier: Programming Language :: Python :: 3.10
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
41
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
42
+ Requires-Python: >=3.10
43
+ Description-Content-Type: text/markdown
44
+ License-File: LICENSE
45
+ Requires-Dist: pydantic>=2.12.3
46
+ Requires-Dist: langchain-core>=1.0.3
47
+ Requires-Dist: langchain-openai>=1.0.2
48
+ Requires-Dist: langchain-anthropic>=1.0.1
49
+ Requires-Dist: langchain-google-genai>=3.0.1
50
+ Provides-Extra: dev
51
+ Requires-Dist: ruff>=0.14.1; extra == "dev"
52
+ Requires-Dist: build>=1.0; extra == "dev"
53
+ Requires-Dist: twine>=4.0; extra == "dev"
54
+ Provides-Extra: vision
55
+ Requires-Dist: openai>=1.40.0; extra == "vision"
56
+ Requires-Dist: pdf2image>=1.17.0; extra == "vision"
57
+ Requires-Dist: pillow>=10.0.0; extra == "vision"
58
+ Requires-Dist: python-dotenv>=1.0.0; extra == "vision"
59
+ Dynamic: license-file
60
+
61
+ # GAIK - General AI Kit
62
+
63
+ **Reusable AI/ML components for Python**
64
+
65
+ Multi-provider AI toolkit for structured data extraction. Supports OpenAI, Anthropic Claude, Google Gemini, and Azure OpenAI.
66
+
67
+ ## Features
68
+
69
+ ### 🔍 Dynamic Data Extraction (`gaik.extract`)
70
+
71
+ Extract structured data from unstructured text using LangChain's structured outputs:
72
+
73
+ - ✅ **Multi-provider** - OpenAI, Anthropic, Azure, Google - easy switching
74
+ - ✅ **Guaranteed structure** - API-enforced schema compliance
75
+ - ✅ **Type-safe** - Full Pydantic validation
76
+ - ✅ **No code generation** - Uses Pydantic's `create_model()`, no `eval()`
77
+ - ✅ **Cost-effective** - Minimal API calls
78
+ - ✅ **Simple & clean** - Easy to understand, minimal dependencies
79
+
80
+ ### 🖼️ Vision PDF Parsing (`gaik.parsers`)
81
+
82
+ Convert PDF pages to Markdown with OpenAI or Azure OpenAI vision models:
83
+
84
+ - ✅ **Single API surface** - Works with standard OpenAI or Azure deployments
85
+ - ✅ **Optional extras** - Install with `pip install gaik[vision]`
86
+ - ✅ **CLI ready** - See `examples/demo_vision_parser.py` for quick conversions
87
+ - ✅ **Table-aware** - Keeps multi-page tables intact with optional cleanup
88
+
89
+ ## Installation
90
+
91
+ ```bash
92
+ # From PyPI (production)
93
+ pip install gaik
94
+
95
+ # With optional vision parser support
96
+ pip install gaik[vision]
97
+ ```
98
+
99
+ ## Quick Start
100
+
101
+ ### 1. Set up your provider API key
102
+
103
+ **OpenAI (default):**
104
+
105
+ ```bash
106
+ export OPENAI_API_KEY='sk-...' # Get from: https://platform.openai.com/api-keys
107
+ ```
108
+
109
+ **Anthropic:**
110
+
111
+ ```bash
112
+ export ANTHROPIC_API_KEY='sk-ant-...' # Get from: https://console.anthropic.com
113
+ ```
114
+
115
+ **Google:**
116
+
117
+ ```bash
118
+ export GOOGLE_API_KEY='...' # Get from: https://ai.google.dev
119
+ ```
120
+
121
+ **Azure OpenAI:**
122
+
123
+ ```bash
124
+ export AZURE_API_KEY='...'
125
+ export AZURE_ENDPOINT='https://your-resource.openai.azure.com/'
126
+ ```
127
+
128
+ ### 2. Simple Extraction
129
+
130
+ ```python
131
+ from gaik.extract import SchemaExtractor
132
+
133
+ # Using default OpenAI provider
134
+ extractor = SchemaExtractor("Extract name and age from text")
135
+ result = extractor.extract_one("Alice is 25 years old")
136
+ print(result)
137
+ # {'name': 'Alice', 'age': 25}
138
+
139
+ # Switch to Anthropic Claude
140
+ extractor = SchemaExtractor(
141
+ "Extract name and age from text",
142
+ provider="anthropic"
143
+ )
144
+
145
+ # Use Google Gemini
146
+ extractor = SchemaExtractor(
147
+ "Extract name and age from text",
148
+ provider="google"
149
+ )
150
+ ```
151
+
152
+ ### 3. Batch Extraction
153
+
154
+ ```python
155
+ from gaik.extract import dynamic_extraction_workflow
156
+
157
+ description = """
158
+ Extract from invoices:
159
+ - Invoice number
160
+ - Total amount in USD
161
+ - Vendor name
162
+ """
163
+
164
+ documents = [
165
+ "Invoice #12345 from Acme Corp. Total: $1,500",
166
+ "INV-67890, Supplier: TechCo, Amount: $2,750"
167
+ ]
168
+
169
+ # Use any provider
170
+ results = dynamic_extraction_workflow(
171
+ description,
172
+ documents,
173
+ provider="openai" # or "anthropic", "google", "azure"
174
+ )
175
+
176
+ for result in results:
177
+ print(f"Invoice: {result['invoice_number']}, Amount: ${result['total_amount']}")
178
+ ```
179
+
180
+ ### 4. Reusable Extractor (Recommended)
181
+
182
+ ```python
183
+ from gaik.extract import SchemaExtractor
184
+
185
+ # Create extractor once
186
+ extractor = SchemaExtractor("""
187
+ Extract from project reports:
188
+ - Project title
189
+ - Lead institution
190
+ - Total funding in euros
191
+ - List of partner countries
192
+ """)
193
+
194
+ # Reuse for multiple batches
195
+ batch1_results = extractor.extract(documents_batch1)
196
+ batch2_results = extractor.extract(documents_batch2)
197
+
198
+ # Inspect the schema
199
+ print(f"Fields: {extractor.field_names}")
200
+ # ['project_title', 'lead_institution', 'total_funding', 'partner_countries']
201
+ ```
202
+
203
+ ### 5. Schema-Only Generation
204
+
205
+ Generate Pydantic schemas without extraction:
206
+
207
+ ```python
208
+ from gaik.extract import FieldSpec, ExtractionRequirements, create_extraction_model
209
+
210
+ requirements = ExtractionRequirements(
211
+ use_case_name="Invoice",
212
+ fields=[
213
+ FieldSpec(
214
+ field_name="invoice_number",
215
+ field_type="str",
216
+ description="Invoice identifier",
217
+ required=True
218
+ ),
219
+ FieldSpec(
220
+ field_name="amount",
221
+ field_type="float",
222
+ description="Total amount",
223
+ required=True
224
+ )
225
+ ]
226
+ )
227
+
228
+ # Create Pydantic model
229
+ InvoiceModel = create_extraction_model(requirements)
230
+ schema = InvoiceModel.model_json_schema()
231
+ ```
232
+
233
+ ## API Reference
234
+
235
+ | Function/Class | Purpose |
236
+ | ------------------------------- | ------------------------------------------------- |
237
+ | `SchemaExtractor` | Reusable extractor with provider selection |
238
+ | `dynamic_extraction_workflow()` | One-shot extraction from natural language |
239
+ | `create_extraction_model()` | Generate Pydantic model from field specifications |
240
+ | `FieldSpec` | Define a single extraction field |
241
+ | `ExtractionRequirements` | Collection of field specifications |
242
+
243
+ ### Provider Parameters
244
+
245
+ ```python
246
+ SchemaExtractor(
247
+ user_description: str | None = None, # Optional if requirements provided
248
+ provider: Literal["openai", "anthropic", "google", "azure"] = "openai",
249
+ model: str | None = None, # Optional: override default model
250
+ api_key: str | None = None, # Optional: override env variable
251
+ client: BaseChatModel | None = None, # Optional: custom LangChain client
252
+ requirements: ExtractionRequirements | None = None # Optional: pre-defined schema
253
+ )
254
+ ```
255
+
256
+ **Note:**
257
+
258
+ - IDEs with type checking (VS Code, PyCharm) will show autocomplete for `provider` parameter
259
+ - Either `user_description` or `requirements` must be provided
260
+ - Using `requirements` skips LLM parsing step (faster & cheaper)
261
+
262
+ ## Default Models
263
+
264
+ - OpenAI: `gpt-4.1`
265
+ - Anthropic: `claude-sonnet-4-5-20250929`
266
+ - Google: `gemini-2.5-flash`
267
+ - Azure: `gpt-4.1`
268
+
269
+ ## Resources
270
+
271
+ - [GitHub Repository](https://github.com/GAIK-project/toolkit-shared-components)
272
+ - [Examples Directory](https://github.com/GAIK-project/toolkit-shared-components/tree/main/examples)
273
+ - [LangChain Documentation](https://python.langchain.com/docs/how_to/structured_output/)
274
+ - [Pydantic Documentation](https://docs.pydantic.dev/)
275
+
276
+ ## License
277
+
278
+ MIT License - see [LICENSE](LICENSE) file for details.
@@ -0,0 +1,18 @@
1
+ gaik/__init__.py,sha256=32Rg3auhijpgYZCk49LjlGClZkE1Sr5Y7bN0MDdo80Y,700
2
+ gaik/extract/__init__.py,sha256=CukSVkXLn9BS7kjAYIXNZAvZjzXaPCEwGUAhH4V70oE,837
3
+ gaik/extract/extractor.py,sha256=0aoSwi6weRIMigRpXbsMT30XVTpf9MuuOq8-A-21qtI,10836
4
+ gaik/extract/models.py,sha256=cXxivZ72c6rm4DvS0q5zJpegeaL3q2CDJRsttGBzn_Q,1486
5
+ gaik/extract/utils.py,sha256=vHwMQL_QcTybRRwrzECmLcMEei0qBzj69zXexdiermc,3737
6
+ gaik/parsers/__init__.py,sha256=3pk3nDdKw9zO-YuPPKS3CmkBJ4EkP0RefbCtC9-778k,297
7
+ gaik/parsers/vision.py,sha256=Ul1Lhdrvj8BfOXo3Qig0JKcKYMhcZOeWQY62Xu6SM08,13630
8
+ gaik/providers/__init__.py,sha256=pb_ZANsRWZ1AQc5rwhmzpncp0ddlX5UFU0l-_coZho4,1764
9
+ gaik/providers/anthropic.py,sha256=jvPafiAKlXTDa3tag-ohgvSACTy7Tvn4ZH3ZKcCiZSU,1791
10
+ gaik/providers/azure.py,sha256=lRCn04v57BM0vDHNtb9Uc3lzumrL4p3J0VuWB8RS5lY,2161
11
+ gaik/providers/base.py,sha256=upH3S0QbKQSEC99mOF48LtKFcK5g3TFyd0GVxVuvgUk,1885
12
+ gaik/providers/google.py,sha256=t5p4uBb3v81KgZ-JQ7N7oKIS-0doCem1IV4VThfLKIY,1690
13
+ gaik/providers/openai.py,sha256=OUyc_a01oNEqtMO8KnHZmwqJp_hEQQC1FXfXelNNGPU,1559
14
+ gaik-0.2.7.dist-info/licenses/LICENSE,sha256=jE12SAuBnlcHf16GXD5xfZNtSH6ds30U5LniIt_gRis,1087
15
+ gaik-0.2.7.dist-info/METADATA,sha256=6lYKHurKCQK8FVemsdxfKhZ8OTRjpjb9_c70M_UT9AU,9048
16
+ gaik-0.2.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
17
+ gaik-0.2.7.dist-info/top_level.txt,sha256=p-eYZovskBEyr7C7GfsXIYQta9-fGyUSaFYDTX8oky8,5
18
+ gaik-0.2.7.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 GAIK - GenAI for knowledge mgt
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ gaik