gaik 0.2.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gaik-0.2.17/.gitignore ADDED
@@ -0,0 +1 @@
1
+ src/gaik/_version.py
gaik-0.2.17/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 GAIK - GenAI for knowledge mgt
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
gaik-0.2.17/PKG-INFO ADDED
@@ -0,0 +1,291 @@
1
+ Metadata-Version: 2.4
2
+ Name: gaik
3
+ Version: 0.2.17
4
+ Summary: General AI Kit - Reusable AI/ML components for Python
5
+ Author: GAIK Project
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 GAIK - GenAI for knowledge mgt
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://gaik.ai/
29
+ Project-URL: Repository, https://github.com/GAIK-project/gaik-toolkit
30
+ Project-URL: Documentation, https://github.com/GAIK-project/gaik-toolkit/tree/main/packages/python/gaik
31
+ Project-URL: Issues, https://github.com/GAIK-project/gaik-toolkit/issues
32
+ Keywords: ai,ml,langchain,openai,anthropic,google,structured-outputs,pydantic,schema,extraction
33
+ Classifier: Development Status :: 3 - Alpha
34
+ Classifier: Intended Audience :: Developers
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Programming Language :: Python :: 3
37
+ Classifier: Programming Language :: Python :: 3.10
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
41
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
42
+ Requires-Python: >=3.10
43
+ Description-Content-Type: text/markdown
44
+ License-File: LICENSE
45
+ Requires-Dist: pydantic>=2.12.4
46
+ Provides-Extra: extract
47
+ Provides-Extra: parser
48
+ Requires-Dist: openai>=2.7; extra == "parser"
49
+ Requires-Dist: PyMuPDF>=1.23.0; extra == "parser"
50
+ Requires-Dist: python-dotenv>=1.0.0; extra == "parser"
51
+ Requires-Dist: docling; extra == "parser"
52
+ Requires-Dist: psutil; extra == "parser"
53
+ Provides-Extra: all
54
+ Requires-Dist: gaik[extract]; extra == "all"
55
+ Requires-Dist: gaik[parser]; extra == "all"
56
+ Provides-Extra: dev
57
+ Requires-Dist: ruff>=0.14.1; extra == "dev"
58
+ Requires-Dist: build>=1.0; extra == "dev"
59
+ Requires-Dist: twine>=4.0; extra == "dev"
60
+ Requires-Dist: pytest>=8.0; extra == "dev"
61
+ Requires-Dist: pytest-cov>=4.1; extra == "dev"
62
+ Requires-Dist: pytest-mock>=3.12; extra == "dev"
63
+ Requires-Dist: tomli>=2.0.1; extra == "dev"
64
+ Provides-Extra: ci
65
+ Requires-Dist: langchain-core>=1.0.3; extra == "ci"
66
+ Requires-Dist: langchain-openai>=1.0.2; extra == "ci"
67
+ Requires-Dist: langchain-anthropic>=1.0.1; extra == "ci"
68
+ Requires-Dist: langchain-google-genai>=3.0.1; extra == "ci"
69
+ Requires-Dist: ruff>=0.14.1; extra == "ci"
70
+ Requires-Dist: build>=1.0; extra == "ci"
71
+ Requires-Dist: twine>=4.0; extra == "ci"
72
+ Requires-Dist: pytest>=8.0; extra == "ci"
73
+ Requires-Dist: pytest-cov>=4.1; extra == "ci"
74
+ Requires-Dist: pytest-mock>=3.12; extra == "ci"
75
+ Requires-Dist: tomli>=2.0.1; extra == "ci"
76
+ Dynamic: license-file
77
+
78
+ # GAIK - General AI Kit
79
+
80
+ Multi-provider AI toolkit for Python with structured data extraction and document parsing.
81
+
82
+ ## Installation
83
+
84
+ ```bash
85
+ # Extract features (OpenAI, Anthropic, Google, Azure)
86
+ pip install gaik[extract]
87
+
88
+ # PDF parsing
89
+ pip install gaik[parser]
90
+
91
+ # All features
92
+ pip install gaik[all]
93
+ ```
94
+
95
+ ## Quick Start
96
+
97
+ ### Extract Data
98
+
99
+ ```python
100
+ from gaik.extract import SchemaExtractor
101
+
102
+ # Set API key first: export OPENAI_API_KEY='sk-...'
103
+ extractor = SchemaExtractor("Extract name and age from text")
104
+ result = extractor.extract_one("Alice is 25 years old")
105
+ print(result) # {'name': 'Alice', 'age': 25}
106
+
107
+ # Switch provider
108
+ extractor = SchemaExtractor("Extract name and age", provider="anthropic") # or "google", "azure"
109
+ ```
110
+
111
+ ### Parse PDF to Markdown
112
+
113
+ ```python
114
+ from gaik.parsers import VisionParser, get_openai_config
115
+
116
+ # Set environment: AZURE_API_KEY, AZURE_ENDPOINT, AZURE_DEPLOYMENT
117
+ config = get_openai_config(use_azure=True)
118
+ parser = VisionParser(config)
119
+
120
+ pages = parser.convert_pdf("invoice.pdf", clean_output=True)
121
+ markdown = "\n\n".join(pages)
122
+ ```
123
+
124
+ ### Fast Local PDF Parsing
125
+
126
+ ```python
127
+ from gaik.parsers import PyMuPDFParser
128
+
129
+ parser = PyMuPDFParser()
130
+ result = parser.parse_document("document.pdf")
131
+ print(result["text_content"])
132
+ ```
133
+
134
+ ## Features
135
+
136
+ ### 🔍 Structured Data Extraction
137
+
138
+ - **Multi-provider** - OpenAI, Anthropic, Google, Azure
139
+ - **Type-safe** - Full Pydantic validation
140
+ - **API-enforced** - Guaranteed schema compliance
141
+ - **Simple** - Natural language to structured data
142
+
143
+ ### 📄 Document Parsing
144
+
145
+ - **VisionParser** - PDF to Markdown using vision models
146
+ - **PyMuPDFParser** - Fast local text extraction
147
+ - **No external binaries** - Pure Python dependencies
148
+
149
+ ## API Reference
150
+
151
+ ### Extraction
152
+
153
+ ```python
154
+ SchemaExtractor(
155
+ user_description: str,
156
+ provider: Literal["openai", "anthropic", "google", "azure"] = "openai",
157
+ model: str | None = None,
158
+ api_key: str | None = None,
159
+ )
160
+ ```
161
+
162
+ **Methods:**
163
+ - `extract_one(text: str) -> dict` - Extract from single text
164
+ - `extract(texts: list[str]) -> list[dict]` - Batch extraction
165
+ - `field_names` - List of field names
166
+ - `model` - Generated Pydantic model
167
+
168
+ ### Vision Parser
169
+
170
+ ```python
171
+ VisionParser(
172
+ config: OpenAIConfig,
173
+ custom_prompt: str | None = None,
174
+ use_context: bool = True,
175
+ max_tokens: int = 16_000,
176
+ )
177
+ ```
178
+
179
+ **Methods:**
180
+ - `convert_pdf(pdf_path: str, dpi: int = 200, clean_output: bool = True) -> list[str]`
181
+ - `save_markdown(pages: list[str], output_path: str)`
182
+
183
+ **Config Helper:**
184
+ ```python
185
+ get_openai_config(use_azure: bool = True) -> OpenAIConfig
186
+ ```
187
+
188
+ ### PyMuPDF Parser
189
+
190
+ ```python
191
+ PyMuPDFParser()
192
+ ```
193
+
194
+ **Methods:**
195
+ - `parse_document(file_path: str) -> dict` - Extract text and metadata
196
+
197
+ ## Environment Variables
198
+
199
+ | Provider | Variables |
200
+ |----------|-----------|
201
+ | OpenAI | `OPENAI_API_KEY` |
202
+ | Anthropic | `ANTHROPIC_API_KEY` |
203
+ | Google | `GOOGLE_API_KEY` |
204
+ | Azure | `AZURE_API_KEY`, `AZURE_ENDPOINT`, `AZURE_DEPLOYMENT` |
205
+
206
+ ## Default Models
207
+
208
+ | Provider | Model |
209
+ |----------|-------|
210
+ | OpenAI | `gpt-4.1` |
211
+ | Anthropic | `claude-sonnet-4-5-20250929` |
212
+ | Google | `gemini-2.5-flash` |
213
+ | Azure | User's deployment |
214
+
215
+ ## Batch Processing
216
+
217
+ ```python
218
+ extractor = SchemaExtractor("""
219
+ Extract:
220
+ - invoice_number: Invoice ID
221
+ - amount: Total in USD
222
+ - vendor: Company name
223
+ """)
224
+
225
+ documents = [
226
+ "Invoice #12345 from Acme Corp. Total: $1,500",
227
+ "INV-67890, Supplier: TechCo, Amount: $2,750"
228
+ ]
229
+
230
+ results = extractor.extract(documents)
231
+ for result in results:
232
+ print(f"Invoice: {result['invoice_number']}, Amount: ${result['amount']}")
233
+ ```
234
+
235
+ ## Schema Inspection
236
+
237
+ ```python
238
+ extractor = SchemaExtractor("Extract name and age")
239
+
240
+ # Field names
241
+ print(extractor.field_names) # ['name', 'age']
242
+
243
+ # JSON schema
244
+ schema = extractor.model.model_json_schema()
245
+
246
+ # Field specs
247
+ for field in extractor.fields:
248
+ print(f"{field.field_name}: {field.field_type}")
249
+ ```
250
+
251
+ ## Advanced Usage
252
+
253
+ ### Custom Prompt for Vision Parser
254
+
255
+ ```python
256
+ custom_prompt = """
257
+ Convert document to markdown:
258
+ - Preserve all tables
259
+ - Include headers and footers
260
+ - Maintain layout structure
261
+ """
262
+
263
+ parser = VisionParser(config, custom_prompt=custom_prompt)
264
+ ```
265
+
266
+ ### Pre-defined Schema
267
+
268
+ ```python
269
+ from gaik.extract import FieldSpec, ExtractionRequirements, create_extraction_model
270
+
271
+ requirements = ExtractionRequirements(
272
+ use_case_name="Invoice",
273
+ fields=[
274
+ FieldSpec("invoice_number", "str", "Invoice ID", required=True),
275
+ FieldSpec("amount", "float", "Total amount", required=True),
276
+ ]
277
+ )
278
+
279
+ InvoiceModel = create_extraction_model(requirements)
280
+ extractor = SchemaExtractor(requirements=requirements)
281
+ ```
282
+
283
+ ## Resources
284
+
285
+ - **Examples**: [examples/](../../examples/)
286
+ - **Repository**: [github.com/GAIK-project/gaik-toolkit](https://github.com/GAIK-project/gaik-toolkit)
287
+ - **Contributing**: [CONTRIBUTING.md](../../../CONTRIBUTING.md)
288
+
289
+ ## License
290
+
291
+ MIT - see [LICENSE](../../../LICENSE)
gaik-0.2.17/README.md ADDED
@@ -0,0 +1,214 @@
1
+ # GAIK - General AI Kit
2
+
3
+ Multi-provider AI toolkit for Python with structured data extraction and document parsing.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ # Extract features (OpenAI, Anthropic, Google, Azure)
9
+ pip install gaik[extract]
10
+
11
+ # PDF parsing
12
+ pip install gaik[parser]
13
+
14
+ # All features
15
+ pip install gaik[all]
16
+ ```
17
+
18
+ ## Quick Start
19
+
20
+ ### Extract Data
21
+
22
+ ```python
23
+ from gaik.extract import SchemaExtractor
24
+
25
+ # Set API key first: export OPENAI_API_KEY='sk-...'
26
+ extractor = SchemaExtractor("Extract name and age from text")
27
+ result = extractor.extract_one("Alice is 25 years old")
28
+ print(result) # {'name': 'Alice', 'age': 25}
29
+
30
+ # Switch provider
31
+ extractor = SchemaExtractor("Extract name and age", provider="anthropic") # or "google", "azure"
32
+ ```
33
+
34
+ ### Parse PDF to Markdown
35
+
36
+ ```python
37
+ from gaik.parsers import VisionParser, get_openai_config
38
+
39
+ # Set environment: AZURE_API_KEY, AZURE_ENDPOINT, AZURE_DEPLOYMENT
40
+ config = get_openai_config(use_azure=True)
41
+ parser = VisionParser(config)
42
+
43
+ pages = parser.convert_pdf("invoice.pdf", clean_output=True)
44
+ markdown = "\n\n".join(pages)
45
+ ```
46
+
47
+ ### Fast Local PDF Parsing
48
+
49
+ ```python
50
+ from gaik.parsers import PyMuPDFParser
51
+
52
+ parser = PyMuPDFParser()
53
+ result = parser.parse_document("document.pdf")
54
+ print(result["text_content"])
55
+ ```
56
+
57
+ ## Features
58
+
59
+ ### 🔍 Structured Data Extraction
60
+
61
+ - **Multi-provider** - OpenAI, Anthropic, Google, Azure
62
+ - **Type-safe** - Full Pydantic validation
63
+ - **API-enforced** - Guaranteed schema compliance
64
+ - **Simple** - Natural language to structured data
65
+
66
+ ### 📄 Document Parsing
67
+
68
+ - **VisionParser** - PDF to Markdown using vision models
69
+ - **PyMuPDFParser** - Fast local text extraction
70
+ - **No external binaries** - Pure Python dependencies
71
+
72
+ ## API Reference
73
+
74
+ ### Extraction
75
+
76
+ ```python
77
+ SchemaExtractor(
78
+ user_description: str,
79
+ provider: Literal["openai", "anthropic", "google", "azure"] = "openai",
80
+ model: str | None = None,
81
+ api_key: str | None = None,
82
+ )
83
+ ```
84
+
85
+ **Methods:**
86
+ - `extract_one(text: str) -> dict` - Extract from single text
87
+ - `extract(texts: list[str]) -> list[dict]` - Batch extraction
88
+ - `field_names` - List of field names
89
+ - `model` - Generated Pydantic model
90
+
91
+ ### Vision Parser
92
+
93
+ ```python
94
+ VisionParser(
95
+ config: OpenAIConfig,
96
+ custom_prompt: str | None = None,
97
+ use_context: bool = True,
98
+ max_tokens: int = 16_000,
99
+ )
100
+ ```
101
+
102
+ **Methods:**
103
+ - `convert_pdf(pdf_path: str, dpi: int = 200, clean_output: bool = True) -> list[str]`
104
+ - `save_markdown(pages: list[str], output_path: str)`
105
+
106
+ **Config Helper:**
107
+ ```python
108
+ get_openai_config(use_azure: bool = True) -> OpenAIConfig
109
+ ```
110
+
111
+ ### PyMuPDF Parser
112
+
113
+ ```python
114
+ PyMuPDFParser()
115
+ ```
116
+
117
+ **Methods:**
118
+ - `parse_document(file_path: str) -> dict` - Extract text and metadata
119
+
120
+ ## Environment Variables
121
+
122
+ | Provider | Variables |
123
+ |----------|-----------|
124
+ | OpenAI | `OPENAI_API_KEY` |
125
+ | Anthropic | `ANTHROPIC_API_KEY` |
126
+ | Google | `GOOGLE_API_KEY` |
127
+ | Azure | `AZURE_API_KEY`, `AZURE_ENDPOINT`, `AZURE_DEPLOYMENT` |
128
+
129
+ ## Default Models
130
+
131
+ | Provider | Model |
132
+ |----------|-------|
133
+ | OpenAI | `gpt-4.1` |
134
+ | Anthropic | `claude-sonnet-4-5-20250929` |
135
+ | Google | `gemini-2.5-flash` |
136
+ | Azure | User's deployment |
137
+
138
+ ## Batch Processing
139
+
140
+ ```python
141
+ extractor = SchemaExtractor("""
142
+ Extract:
143
+ - invoice_number: Invoice ID
144
+ - amount: Total in USD
145
+ - vendor: Company name
146
+ """)
147
+
148
+ documents = [
149
+ "Invoice #12345 from Acme Corp. Total: $1,500",
150
+ "INV-67890, Supplier: TechCo, Amount: $2,750"
151
+ ]
152
+
153
+ results = extractor.extract(documents)
154
+ for result in results:
155
+ print(f"Invoice: {result['invoice_number']}, Amount: ${result['amount']}")
156
+ ```
157
+
158
+ ## Schema Inspection
159
+
160
+ ```python
161
+ extractor = SchemaExtractor("Extract name and age")
162
+
163
+ # Field names
164
+ print(extractor.field_names) # ['name', 'age']
165
+
166
+ # JSON schema
167
+ schema = extractor.model.model_json_schema()
168
+
169
+ # Field specs
170
+ for field in extractor.fields:
171
+ print(f"{field.field_name}: {field.field_type}")
172
+ ```
173
+
174
+ ## Advanced Usage
175
+
176
+ ### Custom Prompt for Vision Parser
177
+
178
+ ```python
179
+ custom_prompt = """
180
+ Convert document to markdown:
181
+ - Preserve all tables
182
+ - Include headers and footers
183
+ - Maintain layout structure
184
+ """
185
+
186
+ parser = VisionParser(config, custom_prompt=custom_prompt)
187
+ ```
188
+
189
+ ### Pre-defined Schema
190
+
191
+ ```python
192
+ from gaik.extract import FieldSpec, ExtractionRequirements, create_extraction_model
193
+
194
+ requirements = ExtractionRequirements(
195
+ use_case_name="Invoice",
196
+ fields=[
197
+ FieldSpec("invoice_number", "str", "Invoice ID", required=True),
198
+ FieldSpec("amount", "float", "Total amount", required=True),
199
+ ]
200
+ )
201
+
202
+ InvoiceModel = create_extraction_model(requirements)
203
+ extractor = SchemaExtractor(requirements=requirements)
204
+ ```
205
+
206
+ ## Resources
207
+
208
+ - **Examples**: [examples/](../../examples/)
209
+ - **Repository**: [github.com/GAIK-project/gaik-toolkit](https://github.com/GAIK-project/gaik-toolkit)
210
+ - **Contributing**: [CONTRIBUTING.md](../../../CONTRIBUTING.md)
211
+
212
+ ## License
213
+
214
+ MIT - see [LICENSE](../../../LICENSE)
@@ -0,0 +1,131 @@
1
+ [project]
2
+ name = "gaik"
3
+ dynamic = ["version"]
4
+ description = "General AI Kit - Reusable AI/ML components for Python"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = { file = "LICENSE" }
8
+ authors = [{ name = "GAIK Project" }]
9
+ keywords = [
10
+ "ai",
11
+ "ml",
12
+ "langchain",
13
+ "openai",
14
+ "anthropic",
15
+ "google",
16
+ "structured-outputs",
17
+ "pydantic",
18
+ "schema",
19
+ "extraction",
20
+ ]
21
+ classifiers = [
22
+ "Development Status :: 3 - Alpha",
23
+ "Intended Audience :: Developers",
24
+ "License :: OSI Approved :: MIT License",
25
+ "Programming Language :: Python :: 3",
26
+ "Programming Language :: Python :: 3.10",
27
+ "Programming Language :: Python :: 3.11",
28
+ "Programming Language :: Python :: 3.12",
29
+ "Topic :: Software Development :: Libraries :: Python Modules",
30
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
31
+ ]
32
+
33
+ dependencies = [
34
+ # Core runtime requirement shared across all installs
35
+ "pydantic>=2.12.4",
36
+ ]
37
+
38
+ [project.optional-dependencies]
39
+ # Data extraction with all LLM providers (OpenAI, Anthropic, Google, Azure)
40
+ extract = [
41
+ ]
42
+
43
+ # Document parsing (PDF, images, etc.)
44
+ parser = [
45
+ # Required to call OpenAI or Azure OpenAI vision endpoints
46
+ "openai>=2.7",
47
+ # PDF parsing and image conversion (no Pillow required)
48
+ "PyMuPDF>=1.23.0",
49
+ # Environment variable loading
50
+ "python-dotenv>=1.0.0",
51
+ "docling",
52
+ "psutil"
53
+ ]
54
+
55
+ # All features
56
+ all = [
57
+ "gaik[extract]",
58
+ "gaik[parser]",
59
+ ]
60
+
61
+ # Development tooling for maintainers only (pip install -e ".[dev]")
62
+ dev = [
63
+ "ruff>=0.14.1", # Linter/formatter: ruff check . / ruff format .
64
+ "build>=1.0", # Package builder: python -m build
65
+ "twine>=4.0", # PyPI publisher: twine upload dist/*
66
+ "pytest>=8.0", # Test runner: pytest
67
+ "pytest-cov>=4.1", # Coverage reports (optional)
68
+ "pytest-mock>=3.12", # Mock fixtures for testing
69
+ "tomli>=2.0.1", # TOML parser for Python 3.10 tooling
70
+ ]
71
+
72
+ # Continuous integration dependencies (dev + provider extras)
73
+ ci = [
74
+ # Extraction extras
75
+ "langchain-core>=1.0.3",
76
+ "langchain-openai>=1.0.2",
77
+ "langchain-anthropic>=1.0.1",
78
+ "langchain-google-genai>=3.0.1",
79
+ # Development/test tooling
80
+ "ruff>=0.14.1",
81
+ "build>=1.0",
82
+ "twine>=4.0",
83
+ "pytest>=8.0",
84
+ "pytest-cov>=4.1",
85
+ "pytest-mock>=3.12",
86
+ "tomli>=2.0.1",
87
+ ]
88
+
89
+ [project.urls]
90
+ Homepage = "https://gaik.ai/"
91
+ Repository = "https://github.com/GAIK-project/gaik-toolkit"
92
+ Documentation = "https://github.com/GAIK-project/gaik-toolkit/tree/main/packages/python/gaik"
93
+ Issues = "https://github.com/GAIK-project/gaik-toolkit/issues"
94
+
95
+ # Build system configuration (not installed, used during `python -m build`)
96
+ [build-system]
97
+ requires = ["setuptools>=61.0", "wheel", "setuptools-scm>=8.0"]
98
+ build-backend = "setuptools.build_meta"
99
+
100
+ # Source code location (not installed, directs setuptools behavior)
101
+ [tool.setuptools.packages.find]
102
+ where = ["src"]
103
+ exclude = ["gaik.tests", "gaik.*.tests"]
104
+ [tool.setuptools_scm]
105
+ version_file = "src/gaik/_version.py"
106
+ root = "../../.."
107
+ tag_regex = "^v(?P<version>\\d+\\.\\d+\\.\\d+)$"
108
+ fallback_version = "0.0.0"
109
+ version_scheme = "no-guess-dev"
110
+ local_scheme = "no-local-version"
111
+
112
+
113
+ # Include type hints marker file (py.typed included in package)
114
+ [tool.setuptools.package-data]
115
+ gaik = ["py.typed"]
116
+
117
+ # Ruff linter/formatter settings (not installed, used when running `ruff check`)
118
+ [tool.ruff]
119
+ line-length = 100
120
+ target-version = "py310"
121
+ extend-exclude = ["scripts"] # Exclude CI/CD scripts from linting
122
+
123
+ # Ruff lint rules configuration
124
+ [tool.ruff.lint]
125
+ select = ["E", "F", "I", "N", "W", "UP"]
126
+ ignore = []
127
+
128
+ # Pytest test runner configuration
129
+ [tool.pytest.ini_options]
130
+ testpaths = ["src/gaik"]
131
+ addopts = ["-v", "--strict-markers"]
@@ -0,0 +1,28 @@
1
+ # CI/CD Scripts
2
+
3
+ This directory stores helper scripts that CI workflows call after building or installing the package. Keep them lightweight and dependency-free so they can run in both local and GitHub-hosted environments.
4
+
5
+ ## Available scripts
6
+
7
+ | Script | Purpose |
8
+ | ------------------------ | -------------------------------------------------------------------------- |
9
+ | `verify_installation.py` | Smoke test that imports and basic utilities work after `pip install gaik`. |
10
+
11
+ ### verify_installation.py
12
+
13
+ This script performs quick runtime checks without making network calls:
14
+
15
+ - Imports the public API (`gaik`, `gaik.extract`, `gaik.providers`).
16
+ - Instantiates a few Pydantic models (no LLM providers needed).
17
+ - Confirms required providers are registered.
18
+ - Prints a ✅ success message and exits 0.
19
+
20
+ **Used by:** `test.yml` (after unit tests) and `publish.yml` (after uploading to PyPI).
21
+
22
+ Run manually from the repo root:
23
+
24
+ ```bash
25
+ python packages/python/gaik/scripts/verify_installation.py
26
+ ```
27
+
28
+ > Unit tests now live next to the modules they cover (e.g., `src/gaik/extract/tests`). These scripts are only for CI smoke checks.