gaik 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gaik-0.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 GAIK - GenAI for knowledge mgt
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
gaik-0.0.0/PKG-INFO ADDED
@@ -0,0 +1,293 @@
1
+ Metadata-Version: 2.4
2
+ Name: gaik
3
+ Version: 0.0.0
4
+ Summary: General AI Kit - Reusable AI/ML components for Python
5
+ Author: GAIK Project
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 GAIK - GenAI for knowledge mgt
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://gaik.ai/
29
+ Project-URL: Repository, https://github.com/GAIK-project/gaik-toolkit
30
+ Project-URL: Documentation, https://github.com/GAIK-project/gaik-toolkit/tree/main/packages/python/gaik
31
+ Project-URL: Issues, https://github.com/GAIK-project/gaik-toolkit/issues
32
+ Keywords: ai,ml,langchain,openai,anthropic,google,structured-outputs,pydantic,schema,extraction
33
+ Classifier: Development Status :: 3 - Alpha
34
+ Classifier: Intended Audience :: Developers
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Programming Language :: Python :: 3
37
+ Classifier: Programming Language :: Python :: 3.10
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
41
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
42
+ Requires-Python: >=3.10
43
+ Description-Content-Type: text/markdown
44
+ License-File: LICENSE
45
+ Requires-Dist: pydantic>=2.12.4
46
+ Provides-Extra: extract
47
+ Requires-Dist: langchain-core>=1.0.3; extra == "extract"
48
+ Requires-Dist: langchain-openai>=1.0.2; extra == "extract"
49
+ Requires-Dist: langchain-anthropic>=1.0.1; extra == "extract"
50
+ Requires-Dist: langchain-google-genai>=3.0.1; extra == "extract"
51
+ Provides-Extra: parser
52
+ Requires-Dist: openai>=2.7; extra == "parser"
53
+ Requires-Dist: PyMuPDF>=1.23.0; extra == "parser"
54
+ Requires-Dist: python-dotenv>=1.0.0; extra == "parser"
55
+ Provides-Extra: all
56
+ Requires-Dist: gaik[extract]; extra == "all"
57
+ Requires-Dist: gaik[parser]; extra == "all"
58
+ Provides-Extra: dev
59
+ Requires-Dist: ruff>=0.14.1; extra == "dev"
60
+ Requires-Dist: build>=1.0; extra == "dev"
61
+ Requires-Dist: twine>=4.0; extra == "dev"
62
+ Requires-Dist: pytest>=8.0; extra == "dev"
63
+ Requires-Dist: pytest-cov>=4.1; extra == "dev"
64
+ Requires-Dist: pytest-mock>=3.12; extra == "dev"
65
+ Requires-Dist: tomli>=2.0.1; extra == "dev"
66
+ Provides-Extra: ci
67
+ Requires-Dist: langchain-core>=1.0.3; extra == "ci"
68
+ Requires-Dist: langchain-openai>=1.0.2; extra == "ci"
69
+ Requires-Dist: langchain-anthropic>=1.0.1; extra == "ci"
70
+ Requires-Dist: langchain-google-genai>=3.0.1; extra == "ci"
71
+ Requires-Dist: ruff>=0.14.1; extra == "ci"
72
+ Requires-Dist: build>=1.0; extra == "ci"
73
+ Requires-Dist: twine>=4.0; extra == "ci"
74
+ Requires-Dist: pytest>=8.0; extra == "ci"
75
+ Requires-Dist: pytest-cov>=4.1; extra == "ci"
76
+ Requires-Dist: pytest-mock>=3.12; extra == "ci"
77
+ Requires-Dist: tomli>=2.0.1; extra == "ci"
78
+ Dynamic: license-file
79
+
80
+ # GAIK - General AI Kit
81
+
82
+ Multi-provider AI toolkit for Python with structured data extraction and document parsing.
83
+
84
+ ## Installation
85
+
86
+ ```bash
87
+ # Extract features (OpenAI, Anthropic, Google, Azure)
88
+ pip install gaik[extract]
89
+
90
+ # PDF parsing
91
+ pip install gaik[parser]
92
+
93
+ # All features
94
+ pip install gaik[all]
95
+ ```
96
+
97
+ ## Quick Start
98
+
99
+ ### Extract Data
100
+
101
+ ```python
102
+ from gaik.extract import SchemaExtractor
103
+
104
+ # Set API key first: export OPENAI_API_KEY='sk-...'
105
+ extractor = SchemaExtractor("Extract name and age from text")
106
+ result = extractor.extract_one("Alice is 25 years old")
107
+ print(result) # {'name': 'Alice', 'age': 25}
108
+
109
+ # Switch provider
110
+ extractor = SchemaExtractor("Extract name and age", provider="anthropic") # or "google", "azure"
111
+ ```
112
+
113
+ ### Parse PDF to Markdown
114
+
115
+ ```python
116
+ from gaik.parsers import VisionParser, get_openai_config
117
+
118
+ # Set environment: AZURE_API_KEY, AZURE_ENDPOINT, AZURE_DEPLOYMENT
119
+ config = get_openai_config(use_azure=True)
120
+ parser = VisionParser(config)
121
+
122
+ pages = parser.convert_pdf("invoice.pdf", clean_output=True)
123
+ markdown = "\n\n".join(pages)
124
+ ```
125
+
126
+ ### Fast Local PDF Parsing
127
+
128
+ ```python
129
+ from gaik.parsers import PyMuPDFParser
130
+
131
+ parser = PyMuPDFParser()
132
+ result = parser.parse_document("document.pdf")
133
+ print(result["text_content"])
134
+ ```
135
+
136
+ ## Features
137
+
138
+ ### 🔍 Structured Data Extraction
139
+
140
+ - **Multi-provider** - OpenAI, Anthropic, Google, Azure
141
+ - **Type-safe** - Full Pydantic validation
142
+ - **API-enforced** - Guaranteed schema compliance
143
+ - **Simple** - Natural language to structured data
144
+
145
+ ### 📄 Document Parsing
146
+
147
+ - **VisionParser** - PDF to Markdown using vision models
148
+ - **PyMuPDFParser** - Fast local text extraction
149
+ - **No external binaries** - Pure Python dependencies
150
+
151
+ ## API Reference
152
+
153
+ ### Extraction
154
+
155
+ ```python
156
+ SchemaExtractor(
157
+ user_description: str,
158
+ provider: Literal["openai", "anthropic", "google", "azure"] = "openai",
159
+ model: str | None = None,
160
+ api_key: str | None = None,
161
+ )
162
+ ```
163
+
164
+ **Methods:**
165
+ - `extract_one(text: str) -> dict` - Extract from single text
166
+ - `extract(texts: list[str]) -> list[dict]` - Batch extraction
167
+ - `field_names` - List of field names
168
+ - `model` - Generated Pydantic model
169
+
170
+ ### Vision Parser
171
+
172
+ ```python
173
+ VisionParser(
174
+ config: OpenAIConfig,
175
+ custom_prompt: str | None = None,
176
+ use_context: bool = True,
177
+ max_tokens: int = 16_000,
178
+ )
179
+ ```
180
+
181
+ **Methods:**
182
+ - `convert_pdf(pdf_path: str, dpi: int = 200, clean_output: bool = True) -> list[str]`
183
+ - `save_markdown(pages: list[str], output_path: str)`
184
+
185
+ **Config Helper:**
186
+ ```python
187
+ get_openai_config(use_azure: bool = True) -> OpenAIConfig
188
+ ```
189
+
190
+ ### PyMuPDF Parser
191
+
192
+ ```python
193
+ PyMuPDFParser()
194
+ ```
195
+
196
+ **Methods:**
197
+ - `parse_document(file_path: str) -> dict` - Extract text and metadata
198
+
199
+ ## Environment Variables
200
+
201
+ | Provider | Variables |
202
+ |----------|-----------|
203
+ | OpenAI | `OPENAI_API_KEY` |
204
+ | Anthropic | `ANTHROPIC_API_KEY` |
205
+ | Google | `GOOGLE_API_KEY` |
206
+ | Azure | `AZURE_API_KEY`, `AZURE_ENDPOINT`, `AZURE_DEPLOYMENT` |
207
+
208
+ ## Default Models
209
+
210
+ | Provider | Model |
211
+ |----------|-------|
212
+ | OpenAI | `gpt-4.1` |
213
+ | Anthropic | `claude-sonnet-4-5-20250929` |
214
+ | Google | `gemini-2.5-flash` |
215
+ | Azure | User's deployment |
216
+
217
+ ## Batch Processing
218
+
219
+ ```python
220
+ extractor = SchemaExtractor("""
221
+ Extract:
222
+ - invoice_number: Invoice ID
223
+ - amount: Total in USD
224
+ - vendor: Company name
225
+ """)
226
+
227
+ documents = [
228
+ "Invoice #12345 from Acme Corp. Total: $1,500",
229
+ "INV-67890, Supplier: TechCo, Amount: $2,750"
230
+ ]
231
+
232
+ results = extractor.extract(documents)
233
+ for result in results:
234
+ print(f"Invoice: {result['invoice_number']}, Amount: ${result['amount']}")
235
+ ```
236
+
237
+ ## Schema Inspection
238
+
239
+ ```python
240
+ extractor = SchemaExtractor("Extract name and age")
241
+
242
+ # Field names
243
+ print(extractor.field_names) # ['name', 'age']
244
+
245
+ # JSON schema
246
+ schema = extractor.model.model_json_schema()
247
+
248
+ # Field specs
249
+ for field in extractor.fields:
250
+ print(f"{field.field_name}: {field.field_type}")
251
+ ```
252
+
253
+ ## Advanced Usage
254
+
255
+ ### Custom Prompt for Vision Parser
256
+
257
+ ```python
258
+ custom_prompt = """
259
+ Convert document to markdown:
260
+ - Preserve all tables
261
+ - Include headers and footers
262
+ - Maintain layout structure
263
+ """
264
+
265
+ parser = VisionParser(config, custom_prompt=custom_prompt)
266
+ ```
267
+
268
+ ### Pre-defined Schema
269
+
270
+ ```python
271
+ from gaik.extract import FieldSpec, ExtractionRequirements, create_extraction_model
272
+
273
+ requirements = ExtractionRequirements(
274
+ use_case_name="Invoice",
275
+ fields=[
276
+ FieldSpec("invoice_number", "str", "Invoice ID", required=True),
277
+ FieldSpec("amount", "float", "Total amount", required=True),
278
+ ]
279
+ )
280
+
281
+ InvoiceModel = create_extraction_model(requirements)
282
+ extractor = SchemaExtractor(requirements=requirements)
283
+ ```
284
+
285
+ ## Resources
286
+
287
+ - **Examples**: [examples/](../../examples/)
288
+ - **Repository**: [github.com/GAIK-project/gaik-toolkit](https://github.com/GAIK-project/gaik-toolkit)
289
+ - **Contributing**: [CONTRIBUTING.md](../../../CONTRIBUTING.md)
290
+
291
+ ## License
292
+
293
+ MIT - see [LICENSE](../../../LICENSE)
gaik-0.0.0/README.md ADDED
@@ -0,0 +1,214 @@
1
+ # GAIK - General AI Kit
2
+
3
+ Multi-provider AI toolkit for Python with structured data extraction and document parsing.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ # Extract features (OpenAI, Anthropic, Google, Azure)
9
+ pip install gaik[extract]
10
+
11
+ # PDF parsing
12
+ pip install gaik[parser]
13
+
14
+ # All features
15
+ pip install gaik[all]
16
+ ```
17
+
18
+ ## Quick Start
19
+
20
+ ### Extract Data
21
+
22
+ ```python
23
+ from gaik.extract import SchemaExtractor
24
+
25
+ # Set API key first: export OPENAI_API_KEY='sk-...'
26
+ extractor = SchemaExtractor("Extract name and age from text")
27
+ result = extractor.extract_one("Alice is 25 years old")
28
+ print(result) # {'name': 'Alice', 'age': 25}
29
+
30
+ # Switch provider
31
+ extractor = SchemaExtractor("Extract name and age", provider="anthropic") # or "google", "azure"
32
+ ```
33
+
34
+ ### Parse PDF to Markdown
35
+
36
+ ```python
37
+ from gaik.parsers import VisionParser, get_openai_config
38
+
39
+ # Set environment: AZURE_API_KEY, AZURE_ENDPOINT, AZURE_DEPLOYMENT
40
+ config = get_openai_config(use_azure=True)
41
+ parser = VisionParser(config)
42
+
43
+ pages = parser.convert_pdf("invoice.pdf", clean_output=True)
44
+ markdown = "\n\n".join(pages)
45
+ ```
46
+
47
+ ### Fast Local PDF Parsing
48
+
49
+ ```python
50
+ from gaik.parsers import PyMuPDFParser
51
+
52
+ parser = PyMuPDFParser()
53
+ result = parser.parse_document("document.pdf")
54
+ print(result["text_content"])
55
+ ```
56
+
57
+ ## Features
58
+
59
+ ### 🔍 Structured Data Extraction
60
+
61
+ - **Multi-provider** - OpenAI, Anthropic, Google, Azure
62
+ - **Type-safe** - Full Pydantic validation
63
+ - **API-enforced** - Guaranteed schema compliance
64
+ - **Simple** - Natural language to structured data
65
+
66
+ ### 📄 Document Parsing
67
+
68
+ - **VisionParser** - PDF to Markdown using vision models
69
+ - **PyMuPDFParser** - Fast local text extraction
70
+ - **No external binaries** - Pure Python dependencies
71
+
72
+ ## API Reference
73
+
74
+ ### Extraction
75
+
76
+ ```python
77
+ SchemaExtractor(
78
+ user_description: str,
79
+ provider: Literal["openai", "anthropic", "google", "azure"] = "openai",
80
+ model: str | None = None,
81
+ api_key: str | None = None,
82
+ )
83
+ ```
84
+
85
+ **Methods:**
86
+ - `extract_one(text: str) -> dict` - Extract from single text
87
+ - `extract(texts: list[str]) -> list[dict]` - Batch extraction
88
+ - `field_names` - List of field names
89
+ - `model` - Generated Pydantic model
90
+
91
+ ### Vision Parser
92
+
93
+ ```python
94
+ VisionParser(
95
+ config: OpenAIConfig,
96
+ custom_prompt: str | None = None,
97
+ use_context: bool = True,
98
+ max_tokens: int = 16_000,
99
+ )
100
+ ```
101
+
102
+ **Methods:**
103
+ - `convert_pdf(pdf_path: str, dpi: int = 200, clean_output: bool = True) -> list[str]`
104
+ - `save_markdown(pages: list[str], output_path: str)`
105
+
106
+ **Config Helper:**
107
+ ```python
108
+ get_openai_config(use_azure: bool = True) -> OpenAIConfig
109
+ ```
110
+
111
+ ### PyMuPDF Parser
112
+
113
+ ```python
114
+ PyMuPDFParser()
115
+ ```
116
+
117
+ **Methods:**
118
+ - `parse_document(file_path: str) -> dict` - Extract text and metadata
119
+
120
+ ## Environment Variables
121
+
122
+ | Provider | Variables |
123
+ |----------|-----------|
124
+ | OpenAI | `OPENAI_API_KEY` |
125
+ | Anthropic | `ANTHROPIC_API_KEY` |
126
+ | Google | `GOOGLE_API_KEY` |
127
+ | Azure | `AZURE_API_KEY`, `AZURE_ENDPOINT`, `AZURE_DEPLOYMENT` |
128
+
129
+ ## Default Models
130
+
131
+ | Provider | Model |
132
+ |----------|-------|
133
+ | OpenAI | `gpt-4.1` |
134
+ | Anthropic | `claude-sonnet-4-5-20250929` |
135
+ | Google | `gemini-2.5-flash` |
136
+ | Azure | User's deployment |
137
+
138
+ ## Batch Processing
139
+
140
+ ```python
141
+ extractor = SchemaExtractor("""
142
+ Extract:
143
+ - invoice_number: Invoice ID
144
+ - amount: Total in USD
145
+ - vendor: Company name
146
+ """)
147
+
148
+ documents = [
149
+ "Invoice #12345 from Acme Corp. Total: $1,500",
150
+ "INV-67890, Supplier: TechCo, Amount: $2,750"
151
+ ]
152
+
153
+ results = extractor.extract(documents)
154
+ for result in results:
155
+ print(f"Invoice: {result['invoice_number']}, Amount: ${result['amount']}")
156
+ ```
157
+
158
+ ## Schema Inspection
159
+
160
+ ```python
161
+ extractor = SchemaExtractor("Extract name and age")
162
+
163
+ # Field names
164
+ print(extractor.field_names) # ['name', 'age']
165
+
166
+ # JSON schema
167
+ schema = extractor.model.model_json_schema()
168
+
169
+ # Field specs
170
+ for field in extractor.fields:
171
+ print(f"{field.field_name}: {field.field_type}")
172
+ ```
173
+
174
+ ## Advanced Usage
175
+
176
+ ### Custom Prompt for Vision Parser
177
+
178
+ ```python
179
+ custom_prompt = """
180
+ Convert document to markdown:
181
+ - Preserve all tables
182
+ - Include headers and footers
183
+ - Maintain layout structure
184
+ """
185
+
186
+ parser = VisionParser(config, custom_prompt=custom_prompt)
187
+ ```
188
+
189
+ ### Pre-defined Schema
190
+
191
+ ```python
192
+ from gaik.extract import FieldSpec, ExtractionRequirements, create_extraction_model
193
+
194
+ requirements = ExtractionRequirements(
195
+ use_case_name="Invoice",
196
+ fields=[
197
+ FieldSpec("invoice_number", "str", "Invoice ID", required=True),
198
+ FieldSpec("amount", "float", "Total amount", required=True),
199
+ ]
200
+ )
201
+
202
+ InvoiceModel = create_extraction_model(requirements)
203
+ extractor = SchemaExtractor(requirements=requirements)
204
+ ```
205
+
206
+ ## Resources
207
+
208
+ - **Examples**: [examples/](../../examples/)
209
+ - **Repository**: [github.com/GAIK-project/gaik-toolkit](https://github.com/GAIK-project/gaik-toolkit)
210
+ - **Contributing**: [CONTRIBUTING.md](../../../CONTRIBUTING.md)
211
+
212
+ ## License
213
+
214
+ MIT - see [LICENSE](../../../LICENSE)
@@ -0,0 +1,132 @@
1
+ [project]
2
+ name = "gaik"
3
+ dynamic = ["version"]
4
+ description = "General AI Kit - Reusable AI/ML components for Python"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = { file = "LICENSE" }
8
+ authors = [{ name = "GAIK Project" }]
9
+ keywords = [
10
+ "ai",
11
+ "ml",
12
+ "langchain",
13
+ "openai",
14
+ "anthropic",
15
+ "google",
16
+ "structured-outputs",
17
+ "pydantic",
18
+ "schema",
19
+ "extraction",
20
+ ]
21
+ classifiers = [
22
+ "Development Status :: 3 - Alpha",
23
+ "Intended Audience :: Developers",
24
+ "License :: OSI Approved :: MIT License",
25
+ "Programming Language :: Python :: 3",
26
+ "Programming Language :: Python :: 3.10",
27
+ "Programming Language :: Python :: 3.11",
28
+ "Programming Language :: Python :: 3.12",
29
+ "Topic :: Software Development :: Libraries :: Python Modules",
30
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
31
+ ]
32
+
33
+ dependencies = [
34
+ # Core runtime requirement shared across all installs
35
+ "pydantic>=2.12.4",
36
+ ]
37
+
38
+ [project.optional-dependencies]
39
+ # Data extraction with all LLM providers (OpenAI, Anthropic, Google, Azure)
40
+ extract = [
41
+ # LangChain core plus every supported chat provider
42
+ "langchain-core>=1.0.3",
43
+ "langchain-openai>=1.0.2",
44
+ "langchain-anthropic>=1.0.1",
45
+ "langchain-google-genai>=3.0.1",
46
+ ]
47
+
48
+ # Document parsing (PDF, images, etc.)
49
+ parser = [
50
+ # Required to call OpenAI or Azure OpenAI vision endpoints
51
+ "openai>=2.7",
52
+ # PDF parsing and image conversion (no Pillow required)
53
+ "PyMuPDF>=1.23.0",
54
+ # Environment variable loading
55
+ "python-dotenv>=1.0.0",
56
+ ]
57
+
58
+ # All features
59
+ all = [
60
+ "gaik[extract]",
61
+ "gaik[parser]",
62
+ ]
63
+
64
+ # Development tooling for maintainers only (pip install -e ".[dev]")
65
+ dev = [
66
+ "ruff>=0.14.1", # Linter/formatter: ruff check . / ruff format .
67
+ "build>=1.0", # Package builder: python -m build
68
+ "twine>=4.0", # PyPI publisher: twine upload dist/*
69
+ "pytest>=8.0", # Test runner: pytest
70
+ "pytest-cov>=4.1", # Coverage reports (optional)
71
+ "pytest-mock>=3.12", # Mock fixtures for testing
72
+ "tomli>=2.0.1", # TOML parser for Python 3.10 tooling
73
+ ]
74
+
75
+ # Continuous integration dependencies (dev + provider extras)
76
+ ci = [
77
+ # Extraction extras
78
+ "langchain-core>=1.0.3",
79
+ "langchain-openai>=1.0.2",
80
+ "langchain-anthropic>=1.0.1",
81
+ "langchain-google-genai>=3.0.1",
82
+ # Development/test tooling
83
+ "ruff>=0.14.1",
84
+ "build>=1.0",
85
+ "twine>=4.0",
86
+ "pytest>=8.0",
87
+ "pytest-cov>=4.1",
88
+ "pytest-mock>=3.12",
89
+ "tomli>=2.0.1",
90
+ ]
91
+
92
+ [project.urls]
93
+ Homepage = "https://gaik.ai/"
94
+ Repository = "https://github.com/GAIK-project/gaik-toolkit"
95
+ Documentation = "https://github.com/GAIK-project/gaik-toolkit/tree/main/packages/python/gaik"
96
+ Issues = "https://github.com/GAIK-project/gaik-toolkit/issues"
97
+
98
+ # Build system configuration (not installed, used during `python -m build`)
99
+ [build-system]
100
+ requires = ["setuptools>=61.0", "wheel", "setuptools-scm>=8.0"]
101
+ build-backend = "setuptools.build_meta"
102
+
103
+ # Source code location (not installed, directs setuptools behavior)
104
+ [tool.setuptools.packages.find]
105
+ where = ["src"]
106
+ exclude = ["gaik.tests", "gaik.*.tests"]
107
+ [tool.setuptools_scm]
108
+ tag_regex = "^v(?P<version>\\d+\\.\\d+\\.\\d+)$"
109
+ fallback_version = "0.0.0"
110
+ version_scheme = "no-guess-dev"
111
+ local_scheme = "no-local-version"
112
+
113
+
114
+ # Include type hints marker file (py.typed included in package)
115
+ [tool.setuptools.package-data]
116
+ gaik = ["py.typed"]
117
+
118
+ # Ruff linter/formatter settings (not installed, used when running `ruff check`)
119
+ [tool.ruff]
120
+ line-length = 100
121
+ target-version = "py310"
122
+ extend-exclude = ["scripts"] # Exclude CI/CD scripts from linting
123
+
124
+ # Ruff lint rules configuration
125
+ [tool.ruff.lint]
126
+ select = ["E", "F", "I", "N", "W", "UP"]
127
+ ignore = []
128
+
129
+ # Pytest test runner configuration
130
+ [tool.pytest.ini_options]
131
+ testpaths = ["src/gaik"]
132
+ addopts = ["-v", "--strict-markers"]
@@ -0,0 +1,28 @@
1
+ # CI/CD Scripts
2
+
3
+ This directory stores helper scripts that CI workflows call after building or installing the package. Keep them lightweight and dependency-free so they can run in both local and GitHub-hosted environments.
4
+
5
+ ## Available scripts
6
+
7
+ | Script | Purpose |
8
+ | ------------------------ | -------------------------------------------------------------------------- |
9
+ | `verify_installation.py` | Smoke test that imports and basic utilities work after `pip install gaik`. |
10
+
11
+ ### verify_installation.py
12
+
13
+ This script performs quick runtime checks without making network calls:
14
+
15
+ - Imports the public API (`gaik`, `gaik.extract`, `gaik.providers`).
16
+ - Instantiates a few Pydantic models (no LLM providers needed).
17
+ - Confirms required providers are registered.
18
+ - Prints a ✅ success message and exits 0.
19
+
20
+ **Used by:** `test.yml` (after unit tests) and `publish.yml` (after uploading to PyPI).
21
+
22
+ Run manually from the repo root:
23
+
24
+ ```bash
25
+ python packages/python/gaik/scripts/verify_installation.py
26
+ ```
27
+
28
+ > Unit tests now live next to the modules they cover (e.g., `src/gaik/extract/tests`). These scripts are only for CI smoke checks.