pyrtex 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyrtex-0.1.2/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 CaptainTrojan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
pyrtex-0.1.2/PKG-INFO ADDED
@@ -0,0 +1,224 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyrtex
3
+ Version: 0.1.2
4
+ Summary: A Python library for batch text extraction and processing using Google Cloud Vertex AI
5
+ Author-email: CaptainTrojan <your-email@example.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/CaptainTrojan/pyrtex
8
+ Project-URL: Repository, https://github.com/CaptainTrojan/pyrtex
9
+ Project-URL: Issues, https://github.com/CaptainTrojan/pyrtex/issues
10
+ Project-URL: Documentation, https://github.com/CaptainTrojan/pyrtex#readme
11
+ Project-URL: Changelog, https://github.com/CaptainTrojan/pyrtex/releases
12
+ Keywords: ai,vertex-ai,google-cloud,text-extraction,batch-processing,gemini,pydantic
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Requires-Python: >=3.9
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: pydantic>=2.0.0
25
+ Requires-Dist: pydantic-settings>=2.0.0
26
+ Requires-Dist: jinja2>=3.0.0
27
+ Requires-Dist: google-cloud-aiplatform>=1.40.0
28
+ Requires-Dist: google-cloud-storage>=2.10.0
29
+ Requires-Dist: google-cloud-bigquery>=3.11.0
30
+ Provides-Extra: dev
31
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
32
+ Requires-Dist: pytest-mock>=3.10.0; extra == "dev"
33
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
34
+ Requires-Dist: black>=23.0.0; extra == "dev"
35
+ Requires-Dist: isort>=5.12.0; extra == "dev"
36
+ Requires-Dist: flake8>=6.0.0; extra == "dev"
37
+ Requires-Dist: bump2version>=1.0.0; extra == "dev"
38
+ Requires-Dist: build>=0.10.0; extra == "dev"
39
+ Requires-Dist: twine>=4.0.0; extra == "dev"
40
+ Dynamic: license-file
41
+
42
+ # PyRTex
43
+
44
+ [![CI](https://github.com/CaptainTrojan/pyrtex/actions/workflows/ci.yml/badge.svg)](https://github.com/CaptainTrojan/pyrtex/actions/workflows/ci.yml)
45
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
46
+
47
+ A simple Python library for batch text extraction and processing using Google Cloud Vertex AI.
48
+
49
+ PyRTex makes it easy to process multiple documents, images, or text snippets with Gemini models and get back structured, type-safe results using Pydantic models.
50
+
51
+ ## โœจ Features
52
+
53
+ - **๐Ÿš€ Simple API**: Just 3 steps - configure, submit, get results
54
+ - **๐Ÿ“ฆ Batch Processing**: Process multiple inputs efficiently
55
+ - **๐Ÿ”’ Type Safety**: Pydantic models for structured output
56
+ - **๐ŸŽจ Flexible Templates**: Jinja2 templates for prompt engineering
57
+ - **โ˜๏ธ GCP Integration**: Seamless Vertex AI and BigQuery integration
58
+ - **๐Ÿงช Testing Mode**: Simulate without GCP costs
59
+
60
+ ## ๐Ÿ“ฆ Installation
61
+
62
+ Install from PyPI (recommended):
63
+ ```bash
64
+ pip install pyrtex
65
+ ```
66
+
67
+ Or install from source:
68
+ ```bash
69
+ git clone https://github.com/CaptainTrojan/pyrtex.git
70
+ cd pyrtex
71
+ pip install -e .
72
+ ```
73
+
74
+ For development:
75
+ ```bash
76
+ pip install -e .[dev]
77
+ ```
78
+
79
+ ## ๐Ÿš€ Quick Start
80
+
81
+ ```python
82
+ from pydantic import BaseModel
83
+ from pyrtex import Job
84
+
85
+ # Define your data structures
86
+ class TextInput(BaseModel):
87
+ content: str
88
+
89
+ class Analysis(BaseModel):
90
+ summary: str
91
+ sentiment: str
92
+ key_points: list[str]
93
+
94
+ # Create a job
95
+ job = Job[Analysis](
96
+ model="gemini-2.0-flash-lite-001",
97
+ output_schema=Analysis,
98
+ prompt_template="Analyze this text: {{ content }}",
99
+ simulation_mode=True # Set to False for real processing
100
+ )
101
+
102
+ # Add your data
103
+ job.add_request("doc1", TextInput(content="Your text here"))
104
+ job.add_request("doc2", TextInput(content="Another document"))
105
+
106
+ # Process and get results
107
+ for result in job.submit().wait().results():
108
+ if result.was_successful:
109
+ print(f"Summary: {result.output.summary}")
110
+ print(f"Sentiment: {result.output.sentiment}")
111
+ else:
112
+ print(f"Error: {result.error}")
113
+ ```
114
+
115
+ ## ๐Ÿ“‹ Core Workflow
116
+
117
+ PyRTex uses a simple 3-step workflow:
118
+
119
+ ### 1. Configure & Add Data
120
+ ```python
121
+ job = Job[YourSchema](model="gemini-2.0-flash-lite-001", ...)
122
+ job.add_request("key1", YourModel(data="value1"))
123
+ job.add_request("key2", YourModel(data="value2"))
124
+ ```
125
+
126
+ ### 2. Submit & Wait
127
+ ```python
128
+ job.submit().wait() # Can be chained
129
+ ```
130
+
131
+ ### 3. Get Results
132
+ ```python
133
+ for result in job.results():
134
+ if result.was_successful:
135
+ # Use result.output (typed!)
136
+ else:
137
+ # Handle result.error
138
+ ```
139
+
140
+ ## โš™๏ธ Configuration
141
+
142
+ For production use, set your GCP project:
143
+
144
+ ```bash
145
+ export GOOGLE_PROJECT_ID="your-project-id"
146
+ ```
147
+
148
+ Then use `simulation_mode=False` for real processing.
149
+
150
+ ## ๐Ÿ“š Examples
151
+
152
+ The `examples/` directory contains complete working examples:
153
+
154
+ ```bash
155
+ cd examples
156
+
157
+ # Generate sample files
158
+ python generate_sample_data.py
159
+
160
+ # Extract contact info from business cards
161
+ python 01_simple_text_extraction.py
162
+
163
+ # Parse product catalogs
164
+ python 02_pdf_product_parsing.py
165
+
166
+ # Extract invoice data from PDFs
167
+ python 03_image_description.py
168
+ ```
169
+
170
+ ### Example Use Cases
171
+
172
+ - **๐Ÿ“‡ Business Cards**: Extract contact information
173
+ - **๐Ÿ“„ Documents**: Process PDFs, images (PNG, JPEG)
174
+ - **๐Ÿ›๏ธ Product Catalogs**: Parse pricing and inventory
175
+ - **๐Ÿงพ Invoices**: Extract structured financial data
176
+ - **๐Ÿ“Š Batch Processing**: Handle multiple files efficiently
177
+
178
+ ## ๐Ÿงช Development
179
+
180
+ ### Running Tests
181
+
182
+ ```bash
183
+ # All tests (mocked, safe)
184
+ ./test_runner.sh
185
+
186
+ # Specific test types
187
+ ./test_runner.sh --unit
188
+ ./test_runner.sh --integration
189
+ ./test_runner.sh --flake
190
+
191
+ # Real GCP tests (costs money!)
192
+ ./test_runner.sh --real --project-id your-project-id
193
+ ```
194
+
195
+ Windows users:
196
+ ```cmd
197
+ test_runner.bat --unit
198
+ test_runner.bat --flake
199
+ ```
200
+
201
+ ### Code Quality
202
+
203
+ - **flake8**: Linting
204
+ - **black**: Code formatting
205
+ - **isort**: Import sorting
206
+ - **pytest**: Testing with coverage
207
+
208
+ ## ๐Ÿค Contributing
209
+
210
+ 1. Fork the repository
211
+ 2. Create a feature branch
212
+ 3. Make your changes
213
+ 4. Run tests: `./test_runner.sh`
214
+ 5. Submit a pull request
215
+
216
+ ## ๐Ÿ“„ License
217
+
218
+ MIT License - see [LICENSE](LICENSE) for details.
219
+
220
+ ## ๐Ÿ†˜ Support
221
+
222
+ - **Issues**: [GitHub Issues](https://github.com/CaptainTrojan/pyrtex/issues)
223
+ - **Examples**: Check the `examples/` directory
224
+ - **Testing**: Use `simulation_mode=True` for development
pyrtex-0.1.2/README.md ADDED
@@ -0,0 +1,183 @@
1
+ # PyRTex
2
+
3
+ [![CI](https://github.com/CaptainTrojan/pyrtex/actions/workflows/ci.yml/badge.svg)](https://github.com/CaptainTrojan/pyrtex/actions/workflows/ci.yml)
4
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
5
+
6
+ A simple Python library for batch text extraction and processing using Google Cloud Vertex AI.
7
+
8
+ PyRTex makes it easy to process multiple documents, images, or text snippets with Gemini models and get back structured, type-safe results using Pydantic models.
9
+
10
+ ## โœจ Features
11
+
12
+ - **๐Ÿš€ Simple API**: Just 3 steps - configure, submit, get results
13
+ - **๐Ÿ“ฆ Batch Processing**: Process multiple inputs efficiently
14
+ - **๐Ÿ”’ Type Safety**: Pydantic models for structured output
15
+ - **๐ŸŽจ Flexible Templates**: Jinja2 templates for prompt engineering
16
+ - **โ˜๏ธ GCP Integration**: Seamless Vertex AI and BigQuery integration
17
+ - **๐Ÿงช Testing Mode**: Simulate without GCP costs
18
+
19
+ ## ๐Ÿ“ฆ Installation
20
+
21
+ Install from PyPI (recommended):
22
+ ```bash
23
+ pip install pyrtex
24
+ ```
25
+
26
+ Or install from source:
27
+ ```bash
28
+ git clone https://github.com/CaptainTrojan/pyrtex.git
29
+ cd pyrtex
30
+ pip install -e .
31
+ ```
32
+
33
+ For development:
34
+ ```bash
35
+ pip install -e .[dev]
36
+ ```
37
+
38
+ ## ๐Ÿš€ Quick Start
39
+
40
+ ```python
41
+ from pydantic import BaseModel
42
+ from pyrtex import Job
43
+
44
+ # Define your data structures
45
+ class TextInput(BaseModel):
46
+ content: str
47
+
48
+ class Analysis(BaseModel):
49
+ summary: str
50
+ sentiment: str
51
+ key_points: list[str]
52
+
53
+ # Create a job
54
+ job = Job[Analysis](
55
+ model="gemini-2.0-flash-lite-001",
56
+ output_schema=Analysis,
57
+ prompt_template="Analyze this text: {{ content }}",
58
+ simulation_mode=True # Set to False for real processing
59
+ )
60
+
61
+ # Add your data
62
+ job.add_request("doc1", TextInput(content="Your text here"))
63
+ job.add_request("doc2", TextInput(content="Another document"))
64
+
65
+ # Process and get results
66
+ for result in job.submit().wait().results():
67
+ if result.was_successful:
68
+ print(f"Summary: {result.output.summary}")
69
+ print(f"Sentiment: {result.output.sentiment}")
70
+ else:
71
+ print(f"Error: {result.error}")
72
+ ```
73
+
74
+ ## ๐Ÿ“‹ Core Workflow
75
+
76
+ PyRTex uses a simple 3-step workflow:
77
+
78
+ ### 1. Configure & Add Data
79
+ ```python
80
+ job = Job[YourSchema](model="gemini-2.0-flash-lite-001", ...)
81
+ job.add_request("key1", YourModel(data="value1"))
82
+ job.add_request("key2", YourModel(data="value2"))
83
+ ```
84
+
85
+ ### 2. Submit & Wait
86
+ ```python
87
+ job.submit().wait() # Can be chained
88
+ ```
89
+
90
+ ### 3. Get Results
91
+ ```python
92
+ for result in job.results():
93
+ if result.was_successful:
94
+ # Use result.output (typed!)
95
+ else:
96
+ # Handle result.error
97
+ ```
98
+
99
+ ## โš™๏ธ Configuration
100
+
101
+ For production use, set your GCP project:
102
+
103
+ ```bash
104
+ export GOOGLE_PROJECT_ID="your-project-id"
105
+ ```
106
+
107
+ Then use `simulation_mode=False` for real processing.
108
+
109
+ ## ๐Ÿ“š Examples
110
+
111
+ The `examples/` directory contains complete working examples:
112
+
113
+ ```bash
114
+ cd examples
115
+
116
+ # Generate sample files
117
+ python generate_sample_data.py
118
+
119
+ # Extract contact info from business cards
120
+ python 01_simple_text_extraction.py
121
+
122
+ # Parse product catalogs
123
+ python 02_pdf_product_parsing.py
124
+
125
+ # Extract invoice data from PDFs
126
+ python 03_image_description.py
127
+ ```
128
+
129
+ ### Example Use Cases
130
+
131
+ - **๐Ÿ“‡ Business Cards**: Extract contact information
132
+ - **๐Ÿ“„ Documents**: Process PDFs, images (PNG, JPEG)
133
+ - **๐Ÿ›๏ธ Product Catalogs**: Parse pricing and inventory
134
+ - **๐Ÿงพ Invoices**: Extract structured financial data
135
+ - **๐Ÿ“Š Batch Processing**: Handle multiple files efficiently
136
+
137
+ ## ๐Ÿงช Development
138
+
139
+ ### Running Tests
140
+
141
+ ```bash
142
+ # All tests (mocked, safe)
143
+ ./test_runner.sh
144
+
145
+ # Specific test types
146
+ ./test_runner.sh --unit
147
+ ./test_runner.sh --integration
148
+ ./test_runner.sh --flake
149
+
150
+ # Real GCP tests (costs money!)
151
+ ./test_runner.sh --real --project-id your-project-id
152
+ ```
153
+
154
+ Windows users:
155
+ ```cmd
156
+ test_runner.bat --unit
157
+ test_runner.bat --flake
158
+ ```
159
+
160
+ ### Code Quality
161
+
162
+ - **flake8**: Linting
163
+ - **black**: Code formatting
164
+ - **isort**: Import sorting
165
+ - **pytest**: Testing with coverage
166
+
167
+ ## ๐Ÿค Contributing
168
+
169
+ 1. Fork the repository
170
+ 2. Create a feature branch
171
+ 3. Make your changes
172
+ 4. Run tests: `./test_runner.sh`
173
+ 5. Submit a pull request
174
+
175
+ ## ๐Ÿ“„ License
176
+
177
+ MIT License - see [LICENSE](LICENSE) for details.
178
+
179
+ ## ๐Ÿ†˜ Support
180
+
181
+ - **Issues**: [GitHub Issues](https://github.com/CaptainTrojan/pyrtex/issues)
182
+ - **Examples**: Check the `examples/` directory
183
+ - **Testing**: Use `simulation_mode=True` for development
@@ -0,0 +1,152 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "pyrtex"
7
+ version = "0.1.2"
8
+ description = "A Python library for batch text extraction and processing using Google Cloud Vertex AI"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = {text = "MIT"}
12
+ keywords = ["ai", "vertex-ai", "google-cloud", "text-extraction", "batch-processing", "gemini", "pydantic"]
13
+ authors = [
14
+ {name = "CaptainTrojan", email = "your-email@example.com"},
15
+ ]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ ]
26
+ dependencies = [
27
+ "pydantic>=2.0.0",
28
+ "pydantic-settings>=2.0.0",
29
+ "jinja2>=3.0.0",
30
+ "google-cloud-aiplatform>=1.40.0",
31
+ "google-cloud-storage>=2.10.0",
32
+ "google-cloud-bigquery>=3.11.0",
33
+ ]
34
+
35
+ [project.optional-dependencies]
36
+ dev = [
37
+ "pytest>=7.0.0",
38
+ "pytest-mock>=3.10.0",
39
+ "pytest-cov>=4.0.0",
40
+ "black>=23.0.0",
41
+ "isort>=5.12.0",
42
+ "flake8>=6.0.0",
43
+ "bump2version>=1.0.0",
44
+ "build>=0.10.0",
45
+ "twine>=4.0.0",
46
+ ]
47
+
48
+ [project.urls]
49
+ Homepage = "https://github.com/CaptainTrojan/pyrtex"
50
+ Repository = "https://github.com/CaptainTrojan/pyrtex"
51
+ Issues = "https://github.com/CaptainTrojan/pyrtex/issues"
52
+ Documentation = "https://github.com/CaptainTrojan/pyrtex#readme"
53
+ Changelog = "https://github.com/CaptainTrojan/pyrtex/releases"
54
+
55
+ [tool.setuptools.packages.find]
56
+ where = ["src"]
57
+
58
+ [tool.setuptools.package-dir]
59
+ "" = "src"
60
+
61
+ [tool.pytest.ini_options]
62
+ testpaths = ["tests"]
63
+ markers = [
64
+ "incurs_costs: marks tests that will incur small GCP costs",
65
+ "slow: marks tests that take a long time to run",
66
+ ]
67
+ addopts = [
68
+ "--strict-markers",
69
+ "--strict-config",
70
+ "--cov=src/pyrtex",
71
+ "--cov-report=term-missing",
72
+ "--cov-report=html",
73
+ ]
74
+ filterwarnings = [
75
+ "ignore::DeprecationWarning",
76
+ "ignore::PendingDeprecationWarning",
77
+ ]
78
+
79
+ [tool.coverage.run]
80
+ source = ["src/pyrtex"]
81
+ omit = [
82
+ "*/tests/*",
83
+ "*/test_*",
84
+ ]
85
+
86
+ [tool.coverage.report]
87
+ exclude_lines = [
88
+ "pragma: no cover",
89
+ "def __repr__",
90
+ "raise AssertionError",
91
+ "raise NotImplementedError",
92
+ "if __name__ == .__main__.:",
93
+ "if TYPE_CHECKING:",
94
+ ]
95
+
96
+ [tool.black]
97
+ line-length = 88
98
+ target-version = ['py38']
99
+ include = '\.pyi?$'
100
+ extend-exclude = '''
101
+ /(
102
+ # directories
103
+ \.eggs
104
+ | \.git
105
+ | \.hg
106
+ | \.mypy_cache
107
+ | \.tox
108
+ | \.venv
109
+ | build
110
+ | dist
111
+ )/
112
+ '''
113
+
114
+ [tool.flake8]
115
+ max-line-length = 88
116
+ extend-ignore = [
117
+ "E203", # whitespace before ':' (conflicts with black)
118
+ "E501", # line too long (handled by black)
119
+ "W503", # line break before binary operator (conflicts with black)
120
+ ]
121
+ exclude = [
122
+ ".git",
123
+ "__pycache__",
124
+ "build",
125
+ "dist",
126
+ ".eggs",
127
+ "*.egg-info",
128
+ ".venv",
129
+ ".tox",
130
+ ]
131
+
132
+ [tool.isort]
133
+ profile = "black"
134
+ multi_line_output = 3
135
+ line_length = 88
136
+ known_first_party = ["pyrtex"]
137
+ src_paths = ["src", "tests"]
138
+
139
+ [tool.mypy]
140
+ python_version = "3.8"
141
+ warn_return_any = true
142
+ warn_unused_configs = true
143
+ disallow_untyped_defs = true
144
+ disallow_incomplete_defs = true
145
+ check_untyped_defs = true
146
+ disallow_untyped_decorators = true
147
+ no_implicit_optional = true
148
+ warn_redundant_casts = true
149
+ warn_unused_ignores = true
150
+ warn_no_return = true
151
+ warn_unreachable = true
152
+ strict_equality = true
pyrtex-0.1.2/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,26 @@
1
+ # src/pyrtex/__init__.py
2
+
3
+ """
4
+ Pyrtex - A Python library for batch text extraction and processing using
5
+ Google Cloud Vertex AI.
6
+
7
+ This library provides a simple interface for submitting batch jobs to Google
8
+ Cloud Vertex AI for text extraction, document processing, and structured data
9
+ extraction tasks.
10
+ """
11
+
12
+ from .client import Job
13
+ from .config import GenerationConfig, InfrastructureConfig
14
+ from .exceptions import ConfigurationError, JobFailedError
15
+ from .models import BatchResult, T
16
+
17
+ __version__ = "0.1.2"
18
+ __all__ = [
19
+ "Job",
20
+ "InfrastructureConfig",
21
+ "GenerationConfig",
22
+ "BatchResult",
23
+ "T",
24
+ "ConfigurationError",
25
+ "JobFailedError",
26
+ ]