moltres 0.18.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. moltres-0.18.0/PKG-INFO +378 -0
  2. moltres-0.18.0/README.md +306 -0
  3. moltres-0.18.0/pyproject.toml +181 -0
  4. moltres-0.18.0/setup.cfg +4 -0
  5. moltres-0.18.0/src/moltres/__init__.py +312 -0
  6. moltres-0.18.0/src/moltres/config.py +202 -0
  7. moltres-0.18.0/src/moltres/dataframe/__init__.py +0 -0
  8. moltres-0.18.0/src/moltres/dataframe/async_dataframe.py +1867 -0
  9. moltres-0.18.0/src/moltres/dataframe/async_groupby.py +422 -0
  10. moltres-0.18.0/src/moltres/dataframe/async_pandas_dataframe.py +1422 -0
  11. moltres-0.18.0/src/moltres/dataframe/async_pandas_groupby.py +408 -0
  12. moltres-0.18.0/src/moltres/dataframe/async_pandas_indexers.py +117 -0
  13. moltres-0.18.0/src/moltres/dataframe/async_polars_dataframe.py +1582 -0
  14. moltres-0.18.0/src/moltres/dataframe/async_polars_groupby.py +385 -0
  15. moltres-0.18.0/src/moltres/dataframe/async_reader.py +557 -0
  16. moltres-0.18.0/src/moltres/dataframe/async_writer.py +996 -0
  17. moltres-0.18.0/src/moltres/dataframe/create_dataframe.py +620 -0
  18. moltres-0.18.0/src/moltres/dataframe/dataframe.py +2715 -0
  19. moltres-0.18.0/src/moltres/dataframe/groupby.py +429 -0
  20. moltres-0.18.0/src/moltres/dataframe/pandas_column.py +142 -0
  21. moltres-0.18.0/src/moltres/dataframe/pandas_dataframe.py +1446 -0
  22. moltres-0.18.0/src/moltres/dataframe/pandas_groupby.py +341 -0
  23. moltres-0.18.0/src/moltres/dataframe/pandas_string_accessor.py +208 -0
  24. moltres-0.18.0/src/moltres/dataframe/polars_column.py +151 -0
  25. moltres-0.18.0/src/moltres/dataframe/polars_dataframe.py +1650 -0
  26. moltres-0.18.0/src/moltres/dataframe/polars_datetime_accessor.py +144 -0
  27. moltres-0.18.0/src/moltres/dataframe/polars_groupby.py +295 -0
  28. moltres-0.18.0/src/moltres/dataframe/polars_string_accessor.py +195 -0
  29. moltres-0.18.0/src/moltres/dataframe/reader.py +656 -0
  30. moltres-0.18.0/src/moltres/dataframe/readers/__init__.py +24 -0
  31. moltres-0.18.0/src/moltres/dataframe/readers/async_csv_reader.py +491 -0
  32. moltres-0.18.0/src/moltres/dataframe/readers/async_json_reader.py +392 -0
  33. moltres-0.18.0/src/moltres/dataframe/readers/async_parquet_reader.py +192 -0
  34. moltres-0.18.0/src/moltres/dataframe/readers/async_readers_init.py +8 -0
  35. moltres-0.18.0/src/moltres/dataframe/readers/async_text_reader.py +176 -0
  36. moltres-0.18.0/src/moltres/dataframe/readers/compression.py +132 -0
  37. moltres-0.18.0/src/moltres/dataframe/readers/csv_reader.py +489 -0
  38. moltres-0.18.0/src/moltres/dataframe/readers/json_reader.py +643 -0
  39. moltres-0.18.0/src/moltres/dataframe/readers/parquet_reader.py +181 -0
  40. moltres-0.18.0/src/moltres/dataframe/readers/schema_inference.py +195 -0
  41. moltres-0.18.0/src/moltres/dataframe/readers/text_reader.py +175 -0
  42. moltres-0.18.0/src/moltres/dataframe/writer.py +1190 -0
  43. moltres-0.18.0/src/moltres/engine/__init__.py +22 -0
  44. moltres-0.18.0/src/moltres/engine/async_connection.py +253 -0
  45. moltres-0.18.0/src/moltres/engine/async_execution.py +332 -0
  46. moltres-0.18.0/src/moltres/engine/connection.py +117 -0
  47. moltres-0.18.0/src/moltres/engine/dialects.py +30 -0
  48. moltres-0.18.0/src/moltres/engine/execution.py +304 -0
  49. moltres-0.18.0/src/moltres/expressions/__init__.py +273 -0
  50. moltres-0.18.0/src/moltres/expressions/column.py +311 -0
  51. moltres-0.18.0/src/moltres/expressions/expr.py +46 -0
  52. moltres-0.18.0/src/moltres/expressions/functions.py +3726 -0
  53. moltres-0.18.0/src/moltres/expressions/sql_parser.py +451 -0
  54. moltres-0.18.0/src/moltres/expressions/when.py +64 -0
  55. moltres-0.18.0/src/moltres/expressions/window.py +203 -0
  56. moltres-0.18.0/src/moltres/io/__init__.py +0 -0
  57. moltres-0.18.0/src/moltres/io/read.py +49 -0
  58. moltres-0.18.0/src/moltres/io/records.py +1367 -0
  59. moltres-0.18.0/src/moltres/io/write.py +33 -0
  60. moltres-0.18.0/src/moltres/logical/__init__.py +0 -0
  61. moltres-0.18.0/src/moltres/logical/operators.py +438 -0
  62. moltres-0.18.0/src/moltres/logical/plan.py +299 -0
  63. moltres-0.18.0/src/moltres/py.typed +0 -0
  64. moltres-0.18.0/src/moltres/sql/__init__.py +0 -0
  65. moltres-0.18.0/src/moltres/sql/builders.py +59 -0
  66. moltres-0.18.0/src/moltres/sql/compiler.py +3154 -0
  67. moltres-0.18.0/src/moltres/sql/ddl.py +726 -0
  68. moltres-0.18.0/src/moltres/table/__init__.py +0 -0
  69. moltres-0.18.0/src/moltres/table/actions.py +393 -0
  70. moltres-0.18.0/src/moltres/table/async_actions.py +425 -0
  71. moltres-0.18.0/src/moltres/table/async_mutations.py +306 -0
  72. moltres-0.18.0/src/moltres/table/async_table.py +1358 -0
  73. moltres-0.18.0/src/moltres/table/batch.py +95 -0
  74. moltres-0.18.0/src/moltres/table/mutations.py +309 -0
  75. moltres-0.18.0/src/moltres/table/schema.py +328 -0
  76. moltres-0.18.0/src/moltres/table/sqlalchemy_integration.py +530 -0
  77. moltres-0.18.0/src/moltres/table/table.py +1485 -0
  78. moltres-0.18.0/src/moltres/utils/__init__.py +0 -0
  79. moltres-0.18.0/src/moltres/utils/exceptions.py +322 -0
  80. moltres-0.18.0/src/moltres/utils/health.py +280 -0
  81. moltres-0.18.0/src/moltres/utils/inspector.py +613 -0
  82. moltres-0.18.0/src/moltres/utils/optional_deps.py +219 -0
  83. moltres-0.18.0/src/moltres/utils/retry.py +241 -0
  84. moltres-0.18.0/src/moltres/utils/telemetry.py +255 -0
  85. moltres-0.18.0/src/moltres/utils/typing.py +10 -0
  86. moltres-0.18.0/src/moltres/utils/validation.py +43 -0
  87. moltres-0.18.0/src/moltres.egg-info/PKG-INFO +378 -0
  88. moltres-0.18.0/src/moltres.egg-info/SOURCES.txt +91 -0
  89. moltres-0.18.0/src/moltres.egg-info/dependency_links.txt +1 -0
  90. moltres-0.18.0/src/moltres.egg-info/requires.txt +52 -0
  91. moltres-0.18.0/src/moltres.egg-info/top_level.txt +1 -0
  92. moltres-0.18.0/tests/test_connection_validation.py +75 -0
  93. moltres-0.18.0/tests/test_imports.py +5 -0
@@ -0,0 +1,378 @@
1
+ Metadata-Version: 2.4
2
+ Name: moltres
3
+ Version: 0.18.0
4
+ Summary: DataFrame API with SQL pushdown execution and real SQL CRUD - the missing layer for SQL in Python
5
+ Author-email: Odos Matthews <odosmatthews@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/eddiethedean/moltres
8
+ Project-URL: Repository, https://github.com/eddiethedean/moltres
9
+ Project-URL: Issues, https://github.com/eddiethedean/moltres/issues
10
+ Keywords: dataframe,sql,crud,pushdown,etl,data-engineering,sqlalchemy,pandas,polars,spark,database,query-builder
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3 :: Only
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Operating System :: MacOS :: MacOS X
21
+ Classifier: Operating System :: Microsoft :: Windows
22
+ Classifier: Operating System :: POSIX :: Linux
23
+ Classifier: Topic :: Database
24
+ Classifier: Topic :: Software Development :: Libraries
25
+ Requires-Python: >=3.9
26
+ Description-Content-Type: text/markdown
27
+ Requires-Dist: SQLAlchemy>=2.0
28
+ Requires-Dist: typing-extensions>=4.5
29
+ Provides-Extra: polars
30
+ Requires-Dist: polars>=1.0; extra == "polars"
31
+ Provides-Extra: pandas
32
+ Requires-Dist: pandas>=2.1; extra == "pandas"
33
+ Provides-Extra: async
34
+ Requires-Dist: aiofiles>=23.0; extra == "async"
35
+ Requires-Dist: greenlet>=3.0.0; extra == "async"
36
+ Provides-Extra: async-postgresql
37
+ Requires-Dist: aiofiles>=23.0; extra == "async-postgresql"
38
+ Requires-Dist: asyncpg>=0.29.0; extra == "async-postgresql"
39
+ Requires-Dist: greenlet>=3.0.0; extra == "async-postgresql"
40
+ Provides-Extra: async-mysql
41
+ Requires-Dist: aiofiles>=23.0; extra == "async-mysql"
42
+ Requires-Dist: aiomysql>=0.2.0; extra == "async-mysql"
43
+ Requires-Dist: greenlet>=3.0.0; extra == "async-mysql"
44
+ Provides-Extra: async-sqlite
45
+ Requires-Dist: aiofiles>=23.0; extra == "async-sqlite"
46
+ Requires-Dist: aiosqlite>=0.19.0; extra == "async-sqlite"
47
+ Requires-Dist: greenlet>=3.0.0; extra == "async-sqlite"
48
+ Provides-Extra: dev
49
+ Requires-Dist: pytest>=8.0; extra == "dev"
50
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
51
+ Requires-Dist: pytest-cov>=4.1; extra == "dev"
52
+ Requires-Dist: pytest-green-light>=0.2.0; extra == "dev"
53
+ Requires-Dist: pytest-xdist>=3.5; extra == "dev"
54
+ Requires-Dist: mypy>=1.8; extra == "dev"
55
+ Requires-Dist: sphinx>=7.0; extra == "dev"
56
+ Requires-Dist: sphinx-rtd-theme>=2.0; extra == "dev"
57
+ Requires-Dist: ruff>=0.6; extra == "dev"
58
+ Requires-Dist: pre-commit>=3.5; extra == "dev"
59
+ Requires-Dist: pandas>=2.1; extra == "dev"
60
+ Requires-Dist: pandas-stubs>=2.1; extra == "dev"
61
+ Requires-Dist: polars>=1.0; extra == "dev"
62
+ Requires-Dist: pyarrow>=10.0; extra == "dev"
63
+ Requires-Dist: aiofiles>=23.0; extra == "dev"
64
+ Requires-Dist: aiosqlite>=0.19.0; extra == "dev"
65
+ Requires-Dist: asyncpg>=0.29.0; extra == "dev"
66
+ Requires-Dist: greenlet>=3.0.0; extra == "dev"
67
+ Requires-Dist: testing.postgresql>=1.3.0; extra == "dev"
68
+ Requires-Dist: testing.mysqld>=1.4.0; extra == "dev"
69
+ Requires-Dist: psycopg2-binary>=2.9.0; extra == "dev"
70
+ Requires-Dist: pymysql>=1.0.0; extra == "dev"
71
+ Requires-Dist: duckdb-engine>=0.9.0; extra == "dev"
72
+
73
+ # Moltres
74
+
75
+ <div align="center">
76
+
77
+ [![CI](https://github.com/eddiethedean/moltres/actions/workflows/ci.yml/badge.svg)](https://github.com/eddiethedean/moltres/actions/workflows/ci.yml)
78
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://github.com/eddiethedean/moltres)
79
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/eddiethedean/moltres/blob/main/LICENSE)
80
+ [![Code style: ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
81
+
82
+ **The Missing DataFrame Layer for SQL in Python**
83
+
84
+ **MOLTRES**: **M**odern **O**perations **L**ayer for **T**ransformations, **R**elational **E**xecution, and **S**QL
85
+
86
+ [Installation](#-installation) โ€ข [Quick Start](#-quick-start) โ€ข [Documentation](#-documentation) โ€ข [Examples](#-examples)
87
+
88
+ </div>
89
+
90
+ ---
91
+
92
+ **Moltres** combines a DataFrame API (like Pandas/Polars), SQL pushdown execution (no data loading into memory), and real SQL CRUD operations (INSERT, UPDATE, DELETE) in one unified interface.
93
+
94
+ Transform millions of rows using familiar DataFrame operationsโ€”all executed directly in SQL without materializing data. Update, insert, and delete with column-aware, type-safe operations.
95
+
96
+ ## โœจ Features
97
+
98
+ - ๐Ÿš€ **PySpark-Style DataFrame API** - Primary API with familiar operations (select, filter, join, groupBy, etc.) for seamless migration from PySpark
99
+ - ๐Ÿผ **Optional Pandas-Style Interface** - Comprehensive Pandas-like API with string accessor, query(), dtypes, shape, pivot, sample, concat, and more
100
+ - ๐Ÿฆ€ **Optional Polars-Style Interface** - Polars LazyFrame-like API with expression-based operations, set operations, file I/O, CTEs, and more
101
+ - ๐ŸŽฏ **98% PySpark API Compatibility** - Near-complete compatibility for seamless migration
102
+ - ๐Ÿ—„๏ธ **SQL Pushdown Execution** - All operations compile to SQL and run on your databaseโ€”no data loading into memory
103
+ - โœ๏ธ **Real SQL CRUD** - INSERT, UPDATE, DELETE operations with DataFrame-style syntax
104
+ - ๐Ÿ“Š **Multiple Formats** - Read/write CSV, JSON, JSONL, Parquet, and more
105
+ - ๐Ÿผ **Pandas & Polars Integration** - Pass pandas/polars DataFrames directly to moltres operations
106
+ - ๐ŸŒŠ **Streaming Support** - Handle datasets larger than memory with chunked processing
107
+ - โšก **Async Support** - Full async/await support for all operations
108
+ - ๐Ÿ”’ **Security First** - Built-in SQL injection prevention and validation
109
+
110
+ ## ๐Ÿ“ฆ Installation
111
+
112
+ ```bash
113
+ pip install moltres
114
+
115
+ # Optional: For async support
116
+ pip install moltres[async-postgresql] # PostgreSQL
117
+ pip install moltres[async-mysql] # MySQL
118
+ pip install moltres[async-sqlite] # SQLite
119
+
120
+ # Optional: For pandas/polars result formats
121
+ pip install moltres[pandas,polars]
122
+ ```
123
+
124
+ ## ๐Ÿš€ Quick Start
125
+
126
+ ### Basic DataFrame Operations
127
+
128
+ ```python
129
+ from moltres import col, connect
130
+ from moltres.expressions import functions as F
131
+
132
+ # Connect to your database
133
+ db = connect("sqlite:///example.db")
134
+
135
+ # DataFrame operations with SQL pushdown (no data loading into memory)
136
+ df = (
137
+ db.table("orders")
138
+ .select()
139
+ .join(db.table("customers").select(), on=[col("orders.customer_id") == col("customers.id")])
140
+ .where(col("active") == True)
141
+ .group_by("country")
142
+ .agg(F.sum(col("amount")).alias("total_amount"))
143
+ )
144
+
145
+ # Execute and get results (SQL is compiled and executed here)
146
+ results = df.collect() # Returns list of dicts by default
147
+ ```
148
+
149
+ ### Pandas-Style Interface
150
+
151
+ ```python
152
+ df = db.table("users").pandas()
153
+
154
+ # Pandas-style operations
155
+ df[['id', 'name']] # Select columns
156
+ df.query('age > 25 and country == "USA"') # Query with AND/OR
157
+ df['name'].str.upper() # String accessor
158
+ df.groupby('country').agg(age='mean') # GroupBy
159
+ ```
160
+
161
+ ๐Ÿ“š **[See the Pandas Interface Guide โ†’](guides/09-pandas-interface.md)**
162
+
163
+ ### Polars-Style Interface
164
+
165
+ ```python
166
+ df = db.table("users").polars()
167
+
168
+ # Polars-style operations
169
+ df.select("id", "name", (col("age") * 2).alias("double_age"))
170
+ df.filter((col("age") > 25) & (col("country") == "USA"))
171
+ df.group_by("country").agg(F.sum(col("age")))
172
+ ```
173
+
174
+ ๐Ÿ“š **[See the Polars Interface Guide โ†’](guides/10-polars-interface.md)**
175
+
176
+ ### CRUD Operations
177
+
178
+ ```python
179
+ from moltres.io.records import Records
180
+
181
+ # Insert rows
182
+ Records.from_list([
183
+ {"id": 1, "name": "Alice", "email": "alice@example.com"},
184
+ {"id": 2, "name": "Bob", "email": "bob@example.com"},
185
+ ], database=db).insert_into("users")
186
+
187
+ # Update rows
188
+ db.update("users", where=col("active") == 0, set={"active": 1})
189
+
190
+ # Delete rows
191
+ db.delete("users", where=col("email").is_null())
192
+ ```
193
+
194
+ ๐Ÿ“š **[See CRUD Operations Guide โ†’](guides/05-common-patterns.md#data-mutations)**
195
+
196
+ ## ๐Ÿ“– Documentation
197
+
198
+ ### Getting Started
199
+ - **[Getting Started Guide](guides/01-getting-started.md)** - Step-by-step introduction
200
+ - **[Examples Directory](examples/)** - 19 comprehensive example files
201
+ - **[Examples Guide](docs/EXAMPLES.md)** - Common patterns and use cases
202
+
203
+ ### Interface Guides
204
+ - **[Pandas Interface](guides/09-pandas-interface.md)** - Complete pandas-style API reference
205
+ - **[Polars Interface](guides/10-polars-interface.md)** - Complete Polars-style API reference
206
+ - **[PySpark Migration](guides/03-migrating-from-pyspark.md)** - Migrating from PySpark
207
+
208
+ ### Core Topics
209
+ - **[Reading Data](guides/01-getting-started.md#reading-data)** - Tables, SQL, files
210
+ - **[Writing Data](guides/01-getting-started.md#writing-data)** - Tables, files, formats
211
+ - **[Table Management](guides/01-getting-started.md#table-management)** - Create, drop, constraints
212
+ - **[Schema Inspection](guides/01-getting-started.md#schema-inspection)** - Reflection and inspection
213
+ - **[Streaming](guides/04-performance-optimization.md#streaming)** - Large dataset handling
214
+ - **[Async Operations](guides/07-advanced-topics.md#async-support)** - Async/await support
215
+
216
+ ### Advanced Topics
217
+ - **[Performance Optimization](guides/04-performance-optimization.md)** - Query optimization and best practices
218
+ - **[Error Handling](guides/06-error-handling.md)** - Exception handling and debugging
219
+ - **[Best Practices](guides/08-best-practices.md)** - Production-ready patterns
220
+ - **[Advanced Topics](guides/07-advanced-topics.md)** - Window functions, CTEs, transactions
221
+
222
+ ### Reference
223
+ - **[Why Moltres?](docs/WHY_MOLTRES.md)** - Understanding the gap Moltres fills
224
+ - **[Security Guide](docs/SECURITY.md)** - Security best practices
225
+ - **[Troubleshooting](docs/TROUBLESHOOTING.md)** - Common issues and solutions
226
+ - **[API Reference](docs/api/)** - Complete API documentation
227
+
228
+ ## ๐Ÿ“š Examples
229
+
230
+ Comprehensive examples demonstrating all Moltres features:
231
+
232
+ - **[01_connecting.py](examples/01_connecting.py)** - Database connections (sync and async)
233
+ - **[02_dataframe_basics.py](examples/02_dataframe_basics.py)** - Basic DataFrame operations
234
+ - **[03_async_dataframe.py](examples/03_async_dataframe.py)** - Asynchronous operations
235
+ - **[04_joins.py](examples/04_joins.py)** - Join operations
236
+ - **[05_groupby.py](examples/05_groupby.py)** - GroupBy and aggregation
237
+ - **[06_expressions.py](examples/06_expressions.py)** - Column expressions and functions
238
+ - **[07_file_reading.py](examples/07_file_reading.py)** - Reading files (CSV, JSON, Parquet)
239
+ - **[08_file_writing.py](examples/08_file_writing.py)** - Writing DataFrames to files
240
+ - **[09_table_operations.py](examples/09_table_operations.py)** - Table operations and mutations
241
+ - **[10_create_dataframe.py](examples/10_create_dataframe.py)** - Creating DataFrames from Python data
242
+ - **[11_window_functions.py](examples/11_window_functions.py)** - Window functions
243
+ - **[12_sql_operations.py](examples/12_sql_operations.py)** - Raw SQL and SQL operations
244
+ - **[13_transactions.py](examples/13_transactions.py)** - Transaction management
245
+ - **[14_reflection.py](examples/14_reflection.py)** - Schema inspection and reflection
246
+ - **[15_pandas_polars_dataframes.py](examples/15_pandas_polars_dataframes.py)** - Pandas/Polars integration
247
+ - **[16_ux_features.py](examples/16_ux_features.py)** - UX improvements
248
+ - **[17_sqlalchemy_models.py](examples/17_sqlalchemy_models.py)** - SQLAlchemy ORM integration
249
+ - **[18_pandas_interface.py](examples/18_pandas_interface.py)** - Pandas-style interface examples
250
+ - **[19_polars_interface.py](examples/19_polars_interface.py)** - Polars-style interface examples
251
+
252
+ See the [examples directory](examples/) for all example files.
253
+
254
+ ## ๐Ÿ› ๏ธ Supported Operations
255
+
256
+ ### DataFrame Operations
257
+ - `select()` / `selectExpr()` - Project columns or SQL expressions
258
+ - `where()` / `filter()` - Filter rows
259
+ - `join()` - Join with other DataFrames
260
+ - `group_by()` / `groupBy()` - Group rows
261
+ - `agg()` - Aggregate functions
262
+ - `order_by()` / `orderBy()` / `sort()` - Sort rows
263
+ - `limit()` - Limit number of rows
264
+ - `distinct()` - Remove duplicate rows
265
+ - `withColumn()` - Add or rename columns
266
+ - `pivot()` - Pivot operations
267
+ - `explode()` - Explode array/JSON columns
268
+
269
+ ### Column Expressions
270
+ - **Arithmetic**: `+`, `-`, `*`, `/`, `%`
271
+ - **Comparisons**: `==`, `!=`, `<`, `>`, `<=`, `>=`
272
+ - **Boolean**: `&`, `|`, `~`
273
+ - **Functions**: 130+ functions including mathematical, string, date/time, aggregate, window, array, JSON, and utility functions
274
+ - **Window Functions**: `over()`, `partition_by()`, `order_by()` - Full PySpark compatibility
275
+
276
+ ๐Ÿ“š **[See Expressions Guide โ†’](examples/06_expressions.py)**
277
+
278
+ ### Supported SQL Dialects
279
+ - โœ… **SQLite** - Full support
280
+ - โœ… **PostgreSQL** - Full support with dialect-specific optimizations
281
+ - โœ… **MySQL** - Full support with dialect-specific optimizations
282
+ - โœ… **DuckDB** - Full support with PostgreSQL-compatible optimizations
283
+ - โœ… **Other SQLAlchemy-supported databases** - ANSI SQL fallback
284
+
285
+ ## ๐Ÿงช Development
286
+
287
+ ### Setup
288
+
289
+ ```bash
290
+ # Clone the repository
291
+ git clone https://github.com/eddiethedean/moltres.git
292
+ cd moltres
293
+
294
+ # Install in development mode
295
+ pip install -e ".[dev]"
296
+
297
+ # Install pre-commit hooks
298
+ pre-commit install
299
+ ```
300
+
301
+ ### Running Tests
302
+
303
+ ```bash
304
+ # Run all tests
305
+ pytest
306
+
307
+ # Run tests in parallel
308
+ pytest -n 9
309
+
310
+ # Run with coverage
311
+ pytest --cov=src/moltres --cov-report=html
312
+ ```
313
+
314
+ ### Code Quality
315
+
316
+ ```bash
317
+ # Linting
318
+ ruff check .
319
+
320
+ # Formatting
321
+ ruff format .
322
+
323
+ # Type checking (strict mode enabled)
324
+ mypy src
325
+ ```
326
+
327
+ ### Pre-Commit CI Checks
328
+
329
+ ```bash
330
+ # Run all CI checks (linting, type checking, tests)
331
+ make ci-check
332
+
333
+ # Quick linting check only
334
+ make ci-check-lint
335
+ ```
336
+
337
+ ## ๐Ÿค Contributing
338
+
339
+ Contributions are welcome! Please see [`CONTRIBUTING.md`](CONTRIBUTING.md) for guidelines.
340
+
341
+ **Quick Start:**
342
+ 1. Fork the repository
343
+ 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
344
+ 3. Commit your changes (`git commit -m 'Add some amazing feature'`)
345
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
346
+ 5. Open a Pull Request
347
+
348
+ **Before submitting:**
349
+ - Run tests: `pytest`
350
+ - Check code quality: `ruff check . && mypy src`
351
+ - Update documentation if needed
352
+
353
+ ## ๐Ÿ‘ค Author
354
+
355
+ **Odos Matthews**
356
+
357
+ - GitHub: [@eddiethedean](https://github.com/eddiethedean)
358
+ - Email: odosmatthews@gmail.com
359
+
360
+ ## ๐Ÿ™ Acknowledgments
361
+
362
+ - Inspired by PySpark's DataFrame API style, but focused on SQL feature support rather than PySpark feature parity
363
+ - Built on SQLAlchemy for database connectivity and SQL compilation
364
+ - Thanks to all contributors and users
365
+
366
+ ## ๐Ÿ“„ License
367
+
368
+ MIT License - see [LICENSE](LICENSE) file for details.
369
+
370
+ ---
371
+
372
+ <div align="center">
373
+
374
+ **Made with โค๏ธ for the Python data community**
375
+
376
+ [โฌ† Back to Top](#moltres)
377
+
378
+ </div>
@@ -0,0 +1,306 @@
1
+ # Moltres
2
+
3
+ <div align="center">
4
+
5
+ [![CI](https://github.com/eddiethedean/moltres/actions/workflows/ci.yml/badge.svg)](https://github.com/eddiethedean/moltres/actions/workflows/ci.yml)
6
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://github.com/eddiethedean/moltres)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/eddiethedean/moltres/blob/main/LICENSE)
8
+ [![Code style: ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
9
+
10
+ **The Missing DataFrame Layer for SQL in Python**
11
+
12
+ **MOLTRES**: **M**odern **O**perations **L**ayer for **T**ransformations, **R**elational **E**xecution, and **S**QL
13
+
14
+ [Installation](#-installation) โ€ข [Quick Start](#-quick-start) โ€ข [Documentation](#-documentation) โ€ข [Examples](#-examples)
15
+
16
+ </div>
17
+
18
+ ---
19
+
20
+ **Moltres** combines a DataFrame API (like Pandas/Polars), SQL pushdown execution (no data loading into memory), and real SQL CRUD operations (INSERT, UPDATE, DELETE) in one unified interface.
21
+
22
+ Transform millions of rows using familiar DataFrame operationsโ€”all executed directly in SQL without materializing data. Update, insert, and delete with column-aware, type-safe operations.
23
+
24
+ ## โœจ Features
25
+
26
+ - ๐Ÿš€ **PySpark-Style DataFrame API** - Primary API with familiar operations (select, filter, join, groupBy, etc.) for seamless migration from PySpark
27
+ - ๐Ÿผ **Optional Pandas-Style Interface** - Comprehensive Pandas-like API with string accessor, query(), dtypes, shape, pivot, sample, concat, and more
28
+ - ๐Ÿฆ€ **Optional Polars-Style Interface** - Polars LazyFrame-like API with expression-based operations, set operations, file I/O, CTEs, and more
29
+ - ๐ŸŽฏ **98% PySpark API Compatibility** - Near-complete compatibility for seamless migration
30
+ - ๐Ÿ—„๏ธ **SQL Pushdown Execution** - All operations compile to SQL and run on your databaseโ€”no data loading into memory
31
+ - โœ๏ธ **Real SQL CRUD** - INSERT, UPDATE, DELETE operations with DataFrame-style syntax
32
+ - ๐Ÿ“Š **Multiple Formats** - Read/write CSV, JSON, JSONL, Parquet, and more
33
+ - ๐Ÿผ **Pandas & Polars Integration** - Pass pandas/polars DataFrames directly to moltres operations
34
+ - ๐ŸŒŠ **Streaming Support** - Handle datasets larger than memory with chunked processing
35
+ - โšก **Async Support** - Full async/await support for all operations
36
+ - ๐Ÿ”’ **Security First** - Built-in SQL injection prevention and validation
37
+
38
+ ## ๐Ÿ“ฆ Installation
39
+
40
+ ```bash
41
+ pip install moltres
42
+
43
+ # Optional: For async support
44
+ pip install moltres[async-postgresql] # PostgreSQL
45
+ pip install moltres[async-mysql] # MySQL
46
+ pip install moltres[async-sqlite] # SQLite
47
+
48
+ # Optional: For pandas/polars result formats
49
+ pip install moltres[pandas,polars]
50
+ ```
51
+
52
+ ## ๐Ÿš€ Quick Start
53
+
54
+ ### Basic DataFrame Operations
55
+
56
+ ```python
57
+ from moltres import col, connect
58
+ from moltres.expressions import functions as F
59
+
60
+ # Connect to your database
61
+ db = connect("sqlite:///example.db")
62
+
63
+ # DataFrame operations with SQL pushdown (no data loading into memory)
64
+ df = (
65
+ db.table("orders")
66
+ .select()
67
+ .join(db.table("customers").select(), on=[col("orders.customer_id") == col("customers.id")])
68
+ .where(col("active") == True)
69
+ .group_by("country")
70
+ .agg(F.sum(col("amount")).alias("total_amount"))
71
+ )
72
+
73
+ # Execute and get results (SQL is compiled and executed here)
74
+ results = df.collect() # Returns list of dicts by default
75
+ ```
76
+
77
+ ### Pandas-Style Interface
78
+
79
+ ```python
80
+ df = db.table("users").pandas()
81
+
82
+ # Pandas-style operations
83
+ df[['id', 'name']] # Select columns
84
+ df.query('age > 25 and country == "USA"') # Query with AND/OR
85
+ df['name'].str.upper() # String accessor
86
+ df.groupby('country').agg(age='mean') # GroupBy
87
+ ```
88
+
89
+ ๐Ÿ“š **[See the Pandas Interface Guide โ†’](guides/09-pandas-interface.md)**
90
+
91
+ ### Polars-Style Interface
92
+
93
+ ```python
94
+ df = db.table("users").polars()
95
+
96
+ # Polars-style operations
97
+ df.select("id", "name", (col("age") * 2).alias("double_age"))
98
+ df.filter((col("age") > 25) & (col("country") == "USA"))
99
+ df.group_by("country").agg(F.sum(col("age")))
100
+ ```
101
+
102
+ ๐Ÿ“š **[See the Polars Interface Guide โ†’](guides/10-polars-interface.md)**
103
+
104
+ ### CRUD Operations
105
+
106
+ ```python
107
+ from moltres.io.records import Records
108
+
109
+ # Insert rows
110
+ Records.from_list([
111
+ {"id": 1, "name": "Alice", "email": "alice@example.com"},
112
+ {"id": 2, "name": "Bob", "email": "bob@example.com"},
113
+ ], database=db).insert_into("users")
114
+
115
+ # Update rows
116
+ db.update("users", where=col("active") == 0, set={"active": 1})
117
+
118
+ # Delete rows
119
+ db.delete("users", where=col("email").is_null())
120
+ ```
121
+
122
+ ๐Ÿ“š **[See CRUD Operations Guide โ†’](guides/05-common-patterns.md#data-mutations)**
123
+
124
+ ## ๐Ÿ“– Documentation
125
+
126
+ ### Getting Started
127
+ - **[Getting Started Guide](guides/01-getting-started.md)** - Step-by-step introduction
128
+ - **[Examples Directory](examples/)** - 19 comprehensive example files
129
+ - **[Examples Guide](docs/EXAMPLES.md)** - Common patterns and use cases
130
+
131
+ ### Interface Guides
132
+ - **[Pandas Interface](guides/09-pandas-interface.md)** - Complete pandas-style API reference
133
+ - **[Polars Interface](guides/10-polars-interface.md)** - Complete Polars-style API reference
134
+ - **[PySpark Migration](guides/03-migrating-from-pyspark.md)** - Migrating from PySpark
135
+
136
+ ### Core Topics
137
+ - **[Reading Data](guides/01-getting-started.md#reading-data)** - Tables, SQL, files
138
+ - **[Writing Data](guides/01-getting-started.md#writing-data)** - Tables, files, formats
139
+ - **[Table Management](guides/01-getting-started.md#table-management)** - Create, drop, constraints
140
+ - **[Schema Inspection](guides/01-getting-started.md#schema-inspection)** - Reflection and inspection
141
+ - **[Streaming](guides/04-performance-optimization.md#streaming)** - Large dataset handling
142
+ - **[Async Operations](guides/07-advanced-topics.md#async-support)** - Async/await support
143
+
144
+ ### Advanced Topics
145
+ - **[Performance Optimization](guides/04-performance-optimization.md)** - Query optimization and best practices
146
+ - **[Error Handling](guides/06-error-handling.md)** - Exception handling and debugging
147
+ - **[Best Practices](guides/08-best-practices.md)** - Production-ready patterns
148
+ - **[Advanced Topics](guides/07-advanced-topics.md)** - Window functions, CTEs, transactions
149
+
150
+ ### Reference
151
+ - **[Why Moltres?](docs/WHY_MOLTRES.md)** - Understanding the gap Moltres fills
152
+ - **[Security Guide](docs/SECURITY.md)** - Security best practices
153
+ - **[Troubleshooting](docs/TROUBLESHOOTING.md)** - Common issues and solutions
154
+ - **[API Reference](docs/api/)** - Complete API documentation
155
+
156
+ ## ๐Ÿ“š Examples
157
+
158
+ Comprehensive examples demonstrating all Moltres features:
159
+
160
+ - **[01_connecting.py](examples/01_connecting.py)** - Database connections (sync and async)
161
+ - **[02_dataframe_basics.py](examples/02_dataframe_basics.py)** - Basic DataFrame operations
162
+ - **[03_async_dataframe.py](examples/03_async_dataframe.py)** - Asynchronous operations
163
+ - **[04_joins.py](examples/04_joins.py)** - Join operations
164
+ - **[05_groupby.py](examples/05_groupby.py)** - GroupBy and aggregation
165
+ - **[06_expressions.py](examples/06_expressions.py)** - Column expressions and functions
166
+ - **[07_file_reading.py](examples/07_file_reading.py)** - Reading files (CSV, JSON, Parquet)
167
+ - **[08_file_writing.py](examples/08_file_writing.py)** - Writing DataFrames to files
168
+ - **[09_table_operations.py](examples/09_table_operations.py)** - Table operations and mutations
169
+ - **[10_create_dataframe.py](examples/10_create_dataframe.py)** - Creating DataFrames from Python data
170
+ - **[11_window_functions.py](examples/11_window_functions.py)** - Window functions
171
+ - **[12_sql_operations.py](examples/12_sql_operations.py)** - Raw SQL and SQL operations
172
+ - **[13_transactions.py](examples/13_transactions.py)** - Transaction management
173
+ - **[14_reflection.py](examples/14_reflection.py)** - Schema inspection and reflection
174
+ - **[15_pandas_polars_dataframes.py](examples/15_pandas_polars_dataframes.py)** - Pandas/Polars integration
175
+ - **[16_ux_features.py](examples/16_ux_features.py)** - UX improvements
176
+ - **[17_sqlalchemy_models.py](examples/17_sqlalchemy_models.py)** - SQLAlchemy ORM integration
177
+ - **[18_pandas_interface.py](examples/18_pandas_interface.py)** - Pandas-style interface examples
178
+ - **[19_polars_interface.py](examples/19_polars_interface.py)** - Polars-style interface examples
179
+
180
+ See the [examples directory](examples/) for all example files.
181
+
182
+ ## ๐Ÿ› ๏ธ Supported Operations
183
+
184
+ ### DataFrame Operations
185
+ - `select()` / `selectExpr()` - Project columns or SQL expressions
186
+ - `where()` / `filter()` - Filter rows
187
+ - `join()` - Join with other DataFrames
188
+ - `group_by()` / `groupBy()` - Group rows
189
+ - `agg()` - Aggregate functions
190
+ - `order_by()` / `orderBy()` / `sort()` - Sort rows
191
+ - `limit()` - Limit number of rows
192
+ - `distinct()` - Remove duplicate rows
193
+ - `withColumn()` - Add or rename columns
194
+ - `pivot()` - Pivot operations
195
+ - `explode()` - Explode array/JSON columns
196
+
197
+ ### Column Expressions
198
+ - **Arithmetic**: `+`, `-`, `*`, `/`, `%`
199
+ - **Comparisons**: `==`, `!=`, `<`, `>`, `<=`, `>=`
200
+ - **Boolean**: `&`, `|`, `~`
201
+ - **Functions**: 130+ functions including mathematical, string, date/time, aggregate, window, array, JSON, and utility functions
202
+ - **Window Functions**: `over()`, `partition_by()`, `order_by()` - Full PySpark compatibility
203
+
204
+ ๐Ÿ“š **[See Expressions Guide โ†’](examples/06_expressions.py)**
205
+
206
+ ### Supported SQL Dialects
207
+ - โœ… **SQLite** - Full support
208
+ - โœ… **PostgreSQL** - Full support with dialect-specific optimizations
209
+ - โœ… **MySQL** - Full support with dialect-specific optimizations
210
+ - โœ… **DuckDB** - Full support with PostgreSQL-compatible optimizations
211
+ - โœ… **Other SQLAlchemy-supported databases** - ANSI SQL fallback
212
+
213
+ ## ๐Ÿงช Development
214
+
215
+ ### Setup
216
+
217
+ ```bash
218
+ # Clone the repository
219
+ git clone https://github.com/eddiethedean/moltres.git
220
+ cd moltres
221
+
222
+ # Install in development mode
223
+ pip install -e ".[dev]"
224
+
225
+ # Install pre-commit hooks
226
+ pre-commit install
227
+ ```
228
+
229
+ ### Running Tests
230
+
231
+ ```bash
232
+ # Run all tests
233
+ pytest
234
+
235
+ # Run tests in parallel
236
+ pytest -n 9
237
+
238
+ # Run with coverage
239
+ pytest --cov=src/moltres --cov-report=html
240
+ ```
241
+
242
+ ### Code Quality
243
+
244
+ ```bash
245
+ # Linting
246
+ ruff check .
247
+
248
+ # Formatting
249
+ ruff format .
250
+
251
+ # Type checking (strict mode enabled)
252
+ mypy src
253
+ ```
254
+
255
+ ### Pre-Commit CI Checks
256
+
257
+ ```bash
258
+ # Run all CI checks (linting, type checking, tests)
259
+ make ci-check
260
+
261
+ # Quick linting check only
262
+ make ci-check-lint
263
+ ```
264
+
265
+ ## ๐Ÿค Contributing
266
+
267
+ Contributions are welcome! Please see [`CONTRIBUTING.md`](CONTRIBUTING.md) for guidelines.
268
+
269
+ **Quick Start:**
270
+ 1. Fork the repository
271
+ 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
272
+ 3. Commit your changes (`git commit -m 'Add some amazing feature'`)
273
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
274
+ 5. Open a Pull Request
275
+
276
+ **Before submitting:**
277
+ - Run tests: `pytest`
278
+ - Check code quality: `ruff check . && mypy src`
279
+ - Update documentation if needed
280
+
281
+ ## ๐Ÿ‘ค Author
282
+
283
+ **Odos Matthews**
284
+
285
+ - GitHub: [@eddiethedean](https://github.com/eddiethedean)
286
+ - Email: odosmatthews@gmail.com
287
+
288
+ ## ๐Ÿ™ Acknowledgments
289
+
290
+ - Inspired by PySpark's DataFrame API style, but focused on SQL feature support rather than PySpark feature parity
291
+ - Built on SQLAlchemy for database connectivity and SQL compilation
292
+ - Thanks to all contributors and users
293
+
294
+ ## ๐Ÿ“„ License
295
+
296
+ MIT License - see [LICENSE](LICENSE) file for details.
297
+
298
+ ---
299
+
300
+ <div align="center">
301
+
302
+ **Made with โค๏ธ for the Python data community**
303
+
304
+ [โฌ† Back to Top](#moltres)
305
+
306
+ </div>