IQL 1.8.36__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
iql-1.8.36/.gitignore ADDED
@@ -0,0 +1,47 @@
1
+ *.py[cod]
2
+ __pycache__/
3
+ cache/
4
+ *.so
5
+ bin/
6
+ build/
7
+
8
+ develop-eggs/
9
+ *.egg
10
+ eggs/
11
+ *.egg-info/
12
+
13
+ dist/
14
+ lib/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ .installed.cfg
20
+
21
+ keys.ini
22
+
23
+ *.csv
24
+ *.xlsx
25
+ *.zip
26
+ *.pdf
27
+ server_user_id.txt
28
+
29
+ testdata
30
+ scratch*
31
+ .coverage
32
+ htmlcov
33
+ .env
34
+ .pytest_cache
35
+
36
+ *.code-workspace
37
+ *.parquet
38
+ coverage.xml
39
+ .vscode
40
+ .coverage.*
41
+ node_modules
42
+ *.json
43
+ *.json.gz
44
+ *.db
45
+ *.db.wal
46
+
47
+ .history
iql-1.8.36/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2024, Iqmo Corporation
4
+
5
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6
+
7
+ Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
8
+
9
+ Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10
+
11
+ Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
12
+
13
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
iql-1.8.36/PKG-INFO ADDED
@@ -0,0 +1,452 @@
1
+ Metadata-Version: 2.4
2
+ Name: IQL
3
+ Version: 1.8.36
4
+ Summary: I* Query Language
5
+ Project-URL: Homepage, https://github.com/iqmo-org/iql
6
+ Project-URL: Repository, https://github.com/iqmo-org/iql
7
+ Project-URL: Issues, https://github.com/iqmo-org/iql/issues
8
+ Author-email: Paul T <paul@iqmo.com>
9
+ Maintainer-email: Paul T <paul@iqmo.com>
10
+ License-Expression: BSD-3-Clause
11
+ License-File: LICENSE
12
+ Keywords: blpapi,bql,duckdb,iql,query language
13
+ Requires-Python: ~=3.11
14
+ Requires-Dist: cachetools>=6.1.0
15
+ Requires-Dist: duckdb>=1.2.2
16
+ Requires-Dist: pandas>=2.3.0
17
+ Requires-Dist: pyarrow>=20.0.0
18
+ Requires-Dist: sqlparse>=0.5.3
19
+ Description-Content-Type: text/markdown
20
+
21
+ [![PyPI Version](https://badge.fury.io/py/iql.svg)](https://pypi.python.org/pypi/iql)
22
+
23
+ <!--[![Anaconda-Server Badge](https://anaconda.org/conda-forge/iql/badges/version.svg)](https://anaconda.org/conda-forge/iql)-->
24
+
25
+ [![BuildRelease](https://github.com/iqmo-org/iql/actions/workflows/build_release.yml/badge.svg)](https://github.com/iqmo-org/iql/actions/workflows/build_release.yml)
26
+ [![Tests](https://github.com/iqmo-org/iql/actions/workflows/test_coverage.yml/badge.svg)](https://github.com/iqmo-org/iql/actions/workflows/test_coverage.yml)
27
+ [![Coverage badge](https://github.com/iqmo-org/iql/raw/python-coverage-comment-action-data/badge.svg)](https://github.com/iqmo-org/iql/tree/python-coverage-comment-action-data)
28
+
29
+ # I\* Query Language (IQL)
30
+
31
+ IQL is a framework and a collection of Python-based extensions for financial data acquisition.
32
+
33
+ ## Disclaimers
34
+
35
+ THIS PROJECT IS NOT AFFILIATED WITH, SUPPORTED BY, ENDORSED BY OR CONNECTED TO ANY OF THE COMPANIES, PRODUCTS OR SERVICES BELOW.
36
+
37
+ # Installation
38
+
39
+ ```
40
+ # Install
41
+ %pip install iql --upgrade
42
+
43
+ # Load Magics to enable %iql and %%iql for VScode and Jupyter
44
+ %load_ext iql
45
+ ```
46
+
47
+ # Usage
48
+
49
+ Python API
50
+
51
+ ```
52
+ import iql
53
+ df1 = iql.execute(f"select * from ...")
54
+ ```
55
+
56
+ Cell Magic
57
+
58
+ ```
59
+ %%iql -o df1
60
+ select * from ...
61
+ ```
62
+
63
+ Line Magic
64
+
65
+ ```
66
+ df1 = %iql select * from ...
67
+ ```
68
+
69
+ # About IQL
70
+
71
+ IQL serves two purposes: a framework for adding lightweight extensions to data pipelines through declarative SQL queries, and a collection of useful extensions.
72
+
73
+ ## IQL: The Framework
74
+
75
+ IQL allows python functions to be registered and executed inline with SQL. These functions take parameters and can return DataFrames, Parquet Files, or CSVs.
76
+
77
+ ```
78
+ SELECT * FROM myextension(param='abc', param2='def')
79
+ ```
80
+
81
+ In this simple example, IQL extracts and preprocesses myextension. The SQL is rewritten to replace myextension, and the DataFrame is registered with the database. The exact mechanism depends on the database: for DuckDB (default database), the dataframes can be queried on the fly without loading explicitly to the database.
82
+
83
+ ## IQL: The Extensions
84
+
85
+ IQL works out of the box, allowing you to execute SQL statements over your dataframes and external data sources with no configuration.
86
+
87
+ - Bloomberg Query Language (BQL)
88
+
89
+ ```sql
90
+ select * from bql("get(px_last) for(['IBM US Equity']) with(dates=range(-90D, 0D), fill=prev)") order by value desc limit 3
91
+ ```
92
+
93
+ - Bloomberg BLPAPI
94
+ ```sql
95
+ select * from blpapi(fields=("PX_LAST","PX_OPEN", "BID"), historical=False, securities=("IBM US Equity","SPX Index"))
96
+ ```
97
+ - Pandas operations
98
+ ```sql
99
+ SELECT * FROM pandas(table=xyz, pivot=('col1', 'col2', 'values'))
100
+ ```
101
+
102
+ ## Why?
103
+
104
+ SQL is a powerful declarative language, capable of expressing complex data manipulations and enabling efficient optimized processing. Often, interaction with external data is required, making it difficult to build interactive systems leveraging declarative data structures. Combining SQL with a pre-processing framework to externalize data load within the context of a SQL query makes many workflows simpler.
105
+
106
+ Modern analytical databases, such as [DuckDB](https://github.com/duckdb), provide many powerful tools to allow more to be done within SQL and eliminating the back and forth required for data loading and manipulation. IQL is intended to extend the power of these platforms without being tied to a single platform: through the use of pre-processed extensions.
107
+
108
+ ### Native Formats
109
+
110
+ After preprocessing of the IQL Extension queries, the database engine will execute a native query unmodified. This minimizes the dependency on specific databases and database versions.
111
+
112
+ The native SQL format of the database engine is supported: the SQL runs unmodified except for replacement of the IQL SubQueries.
113
+
114
+ The native SubQuery syntax is preserved. Bloomberg BQL queries run without modification. REST API calls can be called via URLs directly. etc.
115
+
116
+ ### Simplicity and Performance
117
+
118
+ Multiple transformations, aggregations and operations can be expressed in a single statement. This reduces the amount of code we need to write, eliminates long chains of Pandas operations, and often leads to significant performance increases.
119
+
120
+ IQL's default database is a higly efficient in-memory OLAP database called DuckDB, which requires zero configuration or administration. You may use a transient database per query, or create a long-lived database and reuse across queries. [DuckDB Performance vs Pandas](https://duckdb.org/2021/05/14/sql-on-pandas.html)
121
+
122
+ ### All in One Place and Extensible
123
+
124
+ You can query REST APIs as if they were database tables. You can add custom business logic to control how certain files are retrieved, cached or pre-processed.
125
+
126
+ IQL is portable across DB environments. DuckDB is shipped by default, but can be replaced by other databases.
127
+
128
+ ### How Does It Work
129
+
130
+ How does it work? IQL iterates over SQL statements (if multiple statements are used), and extracts IQL SubQuerys (ie: fred(...)). Each SubQuery is executed and stored (as a DataFrame or local file). The SQL query is modified to reference the results of the SubQuery instead of the SubQuery itself, and the database engine runs the modified SQL query.
131
+
132
+ For example, given the following query:
133
+
134
+ ```
135
+ %%iql
136
+ SELECT *
137
+ FROM fred('query1') as q1
138
+ JOIN fred('query2') as q2
139
+ ON q1.id=q2.id
140
+ ```
141
+
142
+ In pseudocode (this is logically but not literally what happens):
143
+
144
+ ```
145
+ # pseudocode
146
+ df_q1 = iql.execute("fred('query1")")
147
+ df_q2 = iql.execute("fred('query2")")
148
+
149
+ db.execute("SELECT * FROM df_q1 JOIN df_q2 on df_q1.id = df_q2.id")
150
+ ```
151
+
152
+ Or, using the %iql cell magic:
153
+
154
+ # Extensions:
155
+
156
+ - [Bloomberg BQL](BLOOMBERG_BQL_README.md)
157
+
158
+ - [Bloomberg BLPAPI](https://www.bloomberg.com/professional/support/api-library/)
159
+ - [Pandas](https://pandas.pydata.org/): Allows Pandas operations to be executed within the SQL statement. Not all Pandas operations are available.
160
+
161
+ See the examples/ folder for complete examples.
162
+
163
+ ## Syntax
164
+
165
+ IQL extensions are executed as functional subqueries. Each extension is registered with a unique name.
166
+
167
+ ```
168
+ SELECT \*
169
+ FROM
170
+ bql("get (...) for (...)") q1
171
+ JOIN
172
+ bql("get (...) for (...)") q2
173
+ ON
174
+ q1.id = q2.id
175
+ ```
176
+
177
+ See the example notebooks for more interesting examples.
178
+
179
+ ## SQL Syntax
180
+
181
+ IQL uses a superset of the underlying database language. DuckDB is the default database, with a dialect similar/consistent with PostgreSQL's:
182
+ [DuckDB SQL Introduction](https://duckdb.org/docs/sql/introduction.html)
183
+ [DuckDB SQL Statements](https://duckdb.org/docs/sql/introduction)
184
+
185
+ ## Quoting Strings
186
+
187
+ Strings must be properly quoted and/or escaped, according to normal Python rules. The SubQuery requires a quoted string, be careful to use different quote types for the entire SQL string and the SubQuery string.
188
+
189
+ Triple quotes are convenient, since SQL queries tend to be long and multi-line. Note the three levels of quotes: triple """, single " and single '.
190
+
191
+ ```
192
+ import iql
193
+
194
+ bql_str = "get (...) for ('XYZ')"
195
+ sql_str = f"""
196
+ -- This uses a Python f-string, which allows us to use the {bql_str} variable
197
+ SELECT *
198
+ FROM
199
+ -- bql() is an IQL extension. Note the quotes around the BQL statement.
200
+ -- if the BQL statement contains double quotes,
201
+ bql("{bql_str}")
202
+ """
203
+
204
+ iql.execute(sql_str)
205
+ ```
206
+
207
+ In Notebooks, this is a little simpler, since the outer quotes aren't needed:
208
+
209
+ ```
210
+ %%iql -o bql_df
211
+
212
+ SELECT * FROM bql("get(px_last) for ([`IBM US Equity`])")
213
+ ```
214
+
215
+ # Pandas Extension
216
+
217
+ The pandas options are available in every extension, but sometimes its better to run after the data has been first populated in an earlier query.
218
+
219
+ The syntax is:
220
+
221
+ ```
222
+ iql.execute("""SELECT \* FROM pandas(table=xyz, pivot=('col1', 'col2', 'values'))"""
223
+ ```
224
+
225
+ These operations may also be used in each of the extensions:
226
+
227
+ - fillna_pre='string': Before pivoting, replaces only in a single column: DataFrame["value"].fillna(val)
228
+ - dropna_pre=True | str | list[str]: Before pivoting, If True, DataFrame.dropna(). Else, DataFrame.dropna(subset=[value])
229
+ - pivot=(index,columns,values): DataFrame.pivot(index=index, columns=columns, values=values)
230
+ - fillna=val: DataFrame.fillna(val)
231
+ - dropna=True | str | list[str]: If True, DataFrame.dropna(). Else, DataFrame.dropna(subset=[value])
232
+
233
+ Note: While still in development, [DuckDB's Pivot and Unpivot](https://github.com/duckdb/duckdb/pull/6387) may change how we handle pivoting.
234
+
235
+ # Operations available to all IQL SubQueries:
236
+
237
+ # IQL extension for Bloomberg BQL
238
+
239
+ See [IQL Extension for Bloomberg BQL Readme](BLOOMBERG_BQL_README.md) for more information.
240
+
241
+ ## Troubleshooting: If you see an initialization failure, verify that BQL is available and working.
242
+
243
+ ```
244
+ import bql
245
+ bq = bql.Service()
246
+ bq.execute("get(name) for('IBM US Equity')")
247
+ ```
248
+
249
+ If this fails, you are probably not running in BQuant.
250
+
251
+ # Database
252
+
253
+ ## Database Lifecycle
254
+
255
+ ### Default: In-Memory Database for each iql.execute()
256
+
257
+ By default, a series of iql.execute() calls will create and close an in-memory DuckDB connection for each request.
258
+
259
+ ### Option 1: Keep Database Open
260
+
261
+ Use the iql default connection setting (in-memory only), but leave the connection open:
262
+
263
+ ```
264
+ con = iql.IQL.get_dbconnector().get_connection()
265
+ try:
266
+ iql.execute("CREATE TABLE abc as SELECT * FROM (values(1),(2),(3))", con=con)
267
+ df=iql.execute("SELECT * FROM abc", con=con)
268
+ display(df)
269
+ finally:
270
+ con.close()
271
+ ```
272
+
273
+ SQL statements separated by semicolons. The entire set will be run sequentially against a single database, so side effects will be maintained.
274
+
275
+ ### Option 2: Create Database Externally
276
+
277
+ With this method, you can use a file-based persistent database along with other connectivity options.
278
+
279
+ Or, create a DuckDB Connection [duckdb.connect()](https://duckdb.org/docs/api/python/overview), such as for a file-based persistent database.
280
+
281
+ ```
282
+ df=iql.execute("SELECT * FROM abc", con=con)
283
+ ```
284
+
285
+ # FAQ
286
+
287
+ ## How can I simplify my SQL?
288
+
289
+ There are several approaches to using IQL SubQueries:
290
+
291
+ ### Inline
292
+
293
+ ```
294
+ SELECT fields
295
+ FROM table1
296
+ JOIN table2
297
+ on table1.id=table2.id
298
+ JOIN bql(".....") as k3
299
+ on k3.dates < table2.dates
300
+ WHERE k3.something is true
301
+ ```
302
+
303
+ ### Common Table Expressions (WITH clause)
304
+
305
+ CTEs are necessary when the same subquery will be transformed multiple times within a single query. CTEs are also helpful syntactic sugar: the declaration of a subquery is separate from its use, making the SELECT statement simpler.
306
+
307
+ ```
308
+ WITH k3 as (select * from bql(".....") WHERE something is true)
309
+ SELECT fields
310
+ FROM table1
311
+ JOIN table2
312
+ on table1.id=table2.id
313
+ JOIN k3
314
+ on k3.dates < table2.dates
315
+ ```
316
+
317
+ ### Storing the Data in Tables
318
+
319
+ When data will be accessed by multiple queries, store the data first via CREATE TABLE / CREATE TEMP TABLE instead of running the same IQL SubQueries multiple times. IQL's caching is helpful, if enabled, but storing the data in tables provides more flexibility.
320
+
321
+ ```
322
+ CREATE [TEMP] TABLE k3 as (SELECT * FROM bql(".....") WHERE something is true);
323
+ SELECT fields
324
+ FROM table2
325
+ on table1.id=table2.id
326
+ JOIN k3
327
+ on k3.dates < table2.dates
328
+ ```
329
+
330
+ ## Why DuckDB as the default?
331
+
332
+ We chose [DuckDB](https://duckdb.org/) as the default database module for a few reasons:
333
+
334
+ - DuckDB is awesome and [fast](https://duckdb.org/2021/05/14/sql-on-pandas.html), with vectorized columnar operations.
335
+ - It runs with no setup
336
+ - It runs fully in memory and has support for a variety of data sources
337
+ - DuckDB's SQL language is standard
338
+ - DuckDB has extensive support for the Python ecosystem, including Pandas, PyArrow and Polars
339
+
340
+ ## Why not a DuckDB Extensions?
341
+
342
+ We didn't implement IQL as an extension for a few reasons:
343
+
344
+ - Portability: DuckDB is great, but it's not the only game in town. Engines like SnowFlake are important.
345
+ - Speed of development: Native Python is easy to develop, easy to debug, and convenient to modify and extend.
346
+ - Performance: In our workflows, there was little performance to be gained. Runtime was dominated by external data transfer.
347
+
348
+ We may still implement DuckDB extension(s) to eliminate the extra preprocess/rewrite step.
349
+
350
+ ## Other Databases Engines
351
+
352
+ Any database can be supported by implementing a database module. IQL was written in a syntax neutral method. The key step that's dependent on the database engine is registering (or loading) the SubQuery dataframes to the database engine prior to executing the queries.
353
+
354
+ Modules could be added to support other engines and formats:
355
+
356
+ - [SQLDF](https://pypi.org/project/sqldf/) and [PandaSQL](https://pypi.org/project/pandasql/): Local-only databases that can connect to in-memory Pandas dataframes
357
+ - PyArrow (w/ PySpark/Dask): SubQuery dataframes would be loaded via [pyarrow.Table.from_pandas()](https://arrow.apache.org/docs/python/pandas.html)
358
+ - SnowFlake: During registration step, the Pandas dataframes need to be loaded via the [SnowFlake Pandas Connector](https://docs.snowflake.com/en/user-guide/python-connector-pandas)
359
+ - Other Pandas-centric engines, such as SQLDF and PandaSQL
360
+
361
+ ## Design Principles
362
+
363
+ - Extensibility: Extensions and Database Connectors can be easily modified, replaced, or extended.
364
+ - KISS: Keep it simple. Don't add complexity.
365
+ - REST APIs, such as FRED: Use the complete URL, rather than building yet-another-Python-API
366
+ - Bloomberg BQL: Use native BQL queries without modification
367
+ - Minimal dependencies: Extensions are loaded on-demand. Unused dependencies are not required.
368
+
369
+ # Footnotes
370
+
371
+ ## Useful DuckDB Features
372
+
373
+ ### CTEs
374
+
375
+ ```
376
+ import iql
377
+ df = iql.execute("""
378
+ WITH c AS keyword("..."),
379
+ idx AS keyword("...")
380
+ SELECT c.*, idx.*
381
+ FROM c
382
+ JOIN idx
383
+ ON c.idx=idx.id""")
384
+ display(df)
385
+ ```
386
+
387
+ ### Accessing Global DataFrames:
388
+
389
+ ```
390
+ import iql
391
+ import pandas as pd
392
+
393
+ fun = pd.DataFrame([{'id': 'Someone', 'fun_level': 'High'}])
394
+ iql.execute("""SELECT * FROM fun""")
395
+ ```
396
+
397
+ ### Copy (query) to 'file'
398
+
399
+ ```
400
+ import iql
401
+ iql.execute("""COPY (query) TO 'somefile.parquet'""")
402
+ ```
403
+
404
+ ## Copy to Parquet
405
+
406
+ - Copy to parquet:
407
+ https://duckdb.org/docs/guides/import/parquet_export.html#:~:text=To%20export%20the%20data%20from,exported%20to%20a%20Parquet%20file.
408
+
409
+ # Futures and Ideas
410
+
411
+ ## SQL ReWrite
412
+
413
+ Instead of modifying the SQL in a single step, we could introduce an intermediate statement that has the same logical flow as the code today. This would make it easier to debug, allowing the user to view and debug each step.
414
+
415
+ ```
416
+ SELECT * FROM fred() a JOIN fred() b on a.id=b.id
417
+ ```
418
+
419
+ could be transformed first into:
420
+
421
+ ```
422
+ a=fred();
423
+ b=fred();
424
+ SELECT * FROM a JOIN b
425
+ ```
426
+
427
+ One decision needed here is how to express the first two statements: would we use a CREATE TEMP TABLE or COPY TO to store the SubQuery results, or do we introduce something like CREATE DF.
428
+
429
+ ## Simplifying Parsing
430
+
431
+ We didn't implement a grammar, because each grammar is very platform dependent. Each database has its own product-specific grammar.
432
+
433
+ The current IQL implementation first parses the SQL to extract the named functions, using the sqlparse library, then extracts the IQL subquerys by their named keywords. The SubQueries are then parsed via an AST to extract the parameters and values. Any parsing introduces risks and fragility:
434
+
435
+ - It's possible that sqlparse will fail to parse certain database specific language features. We haven't encountered this yet, but it's something we're thinking about
436
+ - It's also possible that our extraction will fail to recognize proper subqueries, due to how sqlparse extracts the tokens. The code here is not as robust as we'd like, and more testing is needed.
437
+
438
+ There's a few ways to improve this:
439
+
440
+ - Direct string extraction: identify subquery() blocks and extract them directly as strings, rather than parsing the entire SQL file. This would have to properly account for commenting, quoting, and nesting.
441
+ - DuckDB (or whatever platform) extensions: use a lightweight extension to allow the database to externally call the IQL layer, rather than having IQL act as an intermediate step. Or, use a table function, which is not yet supported in SQL, only in relational API.
442
+
443
+ ## Caching
444
+
445
+ The default in-memory cache will grow unbounded within each kernel session. The expiration is only used to invalid data, but expired results are not evicted from memory if not accessed.
446
+
447
+ IQL doesn't provide a default implementation, but the QueryCacheBase is intended to be extended to provide file or S3 caching for large, expensive operations along with more sophisticated caching rules.
448
+
449
+ # Footer
450
+
451
+ Copyright (C) 2024, IQMO Corporation [info@iqmo.com]
452
+ All Rights Reserved
iql-1.8.36/README.md ADDED
@@ -0,0 +1,432 @@
1
+ [![PyPI Version](https://badge.fury.io/py/iql.svg)](https://pypi.python.org/pypi/iql)
2
+
3
+ <!--[![Anaconda-Server Badge](https://anaconda.org/conda-forge/iql/badges/version.svg)](https://anaconda.org/conda-forge/iql)-->
4
+
5
+ [![BuildRelease](https://github.com/iqmo-org/iql/actions/workflows/build_release.yml/badge.svg)](https://github.com/iqmo-org/iql/actions/workflows/build_release.yml)
6
+ [![Tests](https://github.com/iqmo-org/iql/actions/workflows/test_coverage.yml/badge.svg)](https://github.com/iqmo-org/iql/actions/workflows/test_coverage.yml)
7
+ [![Coverage badge](https://github.com/iqmo-org/iql/raw/python-coverage-comment-action-data/badge.svg)](https://github.com/iqmo-org/iql/tree/python-coverage-comment-action-data)
8
+
9
+ # I\* Query Language (IQL)
10
+
11
+ IQL is a framework and a collection of Python-based extensions for financial data acquisition.
12
+
13
+ ## Disclaimers
14
+
15
+ THIS PROJECT IS NOT AFFILIATED WITH, SUPPORTED BY, ENDORSED BY OR CONNECTED TO ANY OF THE COMPANIES, PRODUCTS OR SERVICES BELOW.
16
+
17
+ # Installation
18
+
19
+ ```
20
+ # Install
21
+ %pip install iql --upgrade
22
+
23
+ # Load Magics to enable %iql and %%iql for VScode and Jupyter
24
+ %load_ext iql
25
+ ```
26
+
27
+ # Usage
28
+
29
+ Python API
30
+
31
+ ```
32
+ import iql
33
+ df1 = iql.execute(f"select * from ...")
34
+ ```
35
+
36
+ Cell Magic
37
+
38
+ ```
39
+ %%iql -o df1
40
+ select * from ...
41
+ ```
42
+
43
+ Line Magic
44
+
45
+ ```
46
+ df1 = %iql select * from ...
47
+ ```
48
+
49
+ # About IQL
50
+
51
+ IQL serves two purposes: a framework for adding lightweight extensions to data pipelines through declarative SQL queries, and a collection of useful extensions.
52
+
53
+ ## IQL: The Framework
54
+
55
+ IQL allows python functions to be registered and executed inline with SQL. These functions take parameters and can return DataFrames, Parquet Files, or CSVs.
56
+
57
+ ```
58
+ SELECT * FROM myextension(param='abc', param2='def')
59
+ ```
60
+
61
+ In this simple example, IQL extracts and preprocesses myextension. The SQL is rewritten to replace myextension, and the DataFrame is registered with the database. The exact mechanism depends on the database: for DuckDB (default database), the dataframes can be queried on the fly without loading explicitly to the database.
62
+
63
+ ## IQL: The Extensions
64
+
65
+ IQL works out of the box, allowing you to execute SQL statements over your dataframes and external data sources with no configuration.
66
+
67
+ - Bloomberg Query Language (BQL)
68
+
69
+ ```sql
70
+ select * from bql("get(px_last) for(['IBM US Equity']) with(dates=range(-90D, 0D), fill=prev)") order by value desc limit 3
71
+ ```
72
+
73
+ - Bloomberg BLPAPI
74
+ ```sql
75
+ select * from blpapi(fields=("PX_LAST","PX_OPEN", "BID"), historical=False, securities=("IBM US Equity","SPX Index"))
76
+ ```
77
+ - Pandas operations
78
+ ```sql
79
+ SELECT * FROM pandas(table=xyz, pivot=('col1', 'col2', 'values'))
80
+ ```
81
+
82
+ ## Why?
83
+
84
+ SQL is a powerful declarative language, capable of expressing complex data manipulations and enabling efficient optimized processing. Often, interaction with external data is required, making it difficult to build interactive systems leveraging declarative data structures. Combining SQL with a pre-processing framework to externalize data load within the context of a SQL query makes many workflows simpler.
85
+
86
+ Modern analytical databases, such as [DuckDB](https://github.com/duckdb), provide many powerful tools to allow more to be done within SQL and eliminating the back and forth required for data loading and manipulation. IQL is intended to extend the power of these platforms without being tied to a single platform: through the use of pre-processed extensions.
87
+
88
+ ### Native Formats
89
+
90
+ After preprocessing of the IQL Extension queries, the database engine will execute a native query unmodified. This minimizes the dependency on specific databases and database versions.
91
+
92
+ The native SQL format of the database engine is supported: the SQL runs unmodified except for replacement of the IQL SubQueries.
93
+
94
+ The native SubQuery syntax is preserved. Bloomberg BQL queries run without modification. REST API calls can be called via URLs directly. etc.
95
+
96
+ ### Simplicity and Performance
97
+
98
+ Multiple transformations, aggregations and operations can be expressed in a single statement. This reduces the amount of code we need to write, eliminates long chains of Pandas operations, and often leads to significant performance increases.
99
+
100
+ IQL's default database is a higly efficient in-memory OLAP database called DuckDB, which requires zero configuration or administration. You may use a transient database per query, or create a long-lived database and reuse across queries. [DuckDB Performance vs Pandas](https://duckdb.org/2021/05/14/sql-on-pandas.html)
101
+
102
+ ### All in One Place and Extensible
103
+
104
+ You can query REST APIs as if they were database tables. You can add custom business logic to control how certain files are retrieved, cached or pre-processed.
105
+
106
+ IQL is portable across DB environments. DuckDB is shipped by default, but can be replaced by other databases.
107
+
108
+ ### How Does It Work
109
+
110
+ How does it work? IQL iterates over SQL statements (if multiple statements are used), and extracts IQL SubQuerys (ie: fred(...)). Each SubQuery is executed and stored (as a DataFrame or local file). The SQL query is modified to reference the results of the SubQuery instead of the SubQuery itself, and the database engine runs the modified SQL query.
111
+
112
+ For example, given the following query:
113
+
114
+ ```
115
+ %%iql
116
+ SELECT *
117
+ FROM fred('query1') as q1
118
+ JOIN fred('query2') as q2
119
+ ON q1.id=q2.id
120
+ ```
121
+
122
+ In pseudocode (this is logically but not literally what happens):
123
+
124
+ ```
125
+ # pseudocode
126
+ df_q1 = iql.execute("fred('query1")")
127
+ df_q2 = iql.execute("fred('query2")")
128
+
129
+ db.execute("SELECT * FROM df_q1 JOIN df_q2 on df_q1.id = df_q2.id")
130
+ ```
131
+
132
+ Or, using the %iql cell magic:
133
+
134
+ # Extensions:
135
+
136
+ - [Bloomberg BQL](BLOOMBERG_BQL_README.md)
137
+
138
+ - [Bloomberg BLPAPI](https://www.bloomberg.com/professional/support/api-library/)
139
+ - [Pandas](https://pandas.pydata.org/): Allows Pandas operations to be executed within the SQL statement. Not all Pandas operations are available.
140
+
141
+ See the examples/ folder for complete examples.
142
+
143
+ ## Syntax
144
+
145
+ IQL extensions are executed as functional subqueries. Each extension is registered with a unique name.
146
+
147
+ ```
148
+ SELECT \*
149
+ FROM
150
+ bql("get (...) for (...)") q1
151
+ JOIN
152
+ bql("get (...) for (...)") q2
153
+ ON
154
+ q1.id = q2.id
155
+ ```
156
+
157
+ See the example notebooks for more interesting examples.
158
+
159
+ ## SQL Syntax
160
+
161
+ IQL uses a superset of the underlying database language. DuckDB is the default database, with a dialect similar/consistent with PostgreSQL's:
162
+ [DuckDB SQL Introduction](https://duckdb.org/docs/sql/introduction.html)
163
+ [DuckDB SQL Statements](https://duckdb.org/docs/sql/introduction)
164
+
165
+ ## Quoting Strings
166
+
167
+ Strings must be properly quoted and/or escaped, according to normal Python rules. The SubQuery requires a quoted string, be careful to use different quote types for the entire SQL string and the SubQuery string.
168
+
169
+ Triple quotes are convenient, since SQL queries tend to be long and multi-line. Note the three levels of quotes: triple """, single " and single '.
170
+
171
+ ```
172
+ import iql
173
+
174
+ bql_str = "get (...) for ('XYZ')"
175
+ sql_str = f"""
176
+ -- This uses a Python f-string, which allows us to use the {bql_str} variable
177
+ SELECT *
178
+ FROM
179
+ -- bql() is an IQL extension. Note the quotes around the BQL statement.
180
+ -- if the BQL statement contains double quotes,
181
+ bql("{bql_str}")
182
+ """
183
+
184
+ iql.execute(sql_str)
185
+ ```
186
+
187
+ In Notebooks, this is a little simpler, since the outer quotes aren't needed:
188
+
189
+ ```
190
+ %%iql -o bql_df
191
+
192
+ SELECT * FROM bql("get(px_last) for ([`IBM US Equity`])")
193
+ ```
194
+
195
+ # Pandas Extension
196
+
197
+ The pandas options are available in every extension, but sometimes its better to run after the data has been first populated in an earlier query.
198
+
199
+ The syntax is:
200
+
201
+ ```
202
+ iql.execute("""SELECT \* FROM pandas(table=xyz, pivot=('col1', 'col2', 'values'))"""
203
+ ```
204
+
205
+ These operations may also be used in each of the extensions:
206
+
207
+ - fillna_pre='string': Before pivoting, replaces only in a single column: DataFrame["value"].fillna(val)
208
+ - dropna_pre=True | str | list[str]: Before pivoting, If True, DataFrame.dropna(). Else, DataFrame.dropna(subset=[value])
209
+ - pivot=(index,columns,values): DataFrame.pivot(index=index, columns=columns, values=values)
210
+ - fillna=val: DataFrame.fillna(val)
211
+ - dropna=True | str | list[str]: If True, DataFrame.dropna(). Else, DataFrame.dropna(subset=[value])
212
+
213
+ Note: While still in development, [DuckDB's Pivot and Unpivot](https://github.com/duckdb/duckdb/pull/6387) may change how we handle pivoting.
214
+
215
+ # Operations available to all IQL SubQueries:
216
+
217
+ # IQL extension for Bloomberg BQL
218
+
219
+ See [IQL Extension for Bloomberg BQL Readme](BLOOMBERG_BQL_README.md) for more information.
220
+
221
+ ## Troubleshooting: If you see an initialization failure, verify that BQL is available and working.
222
+
223
+ ```
224
+ import bql
225
+ bq = bql.Service()
226
+ bq.execute("get(name) for('IBM US Equity')")
227
+ ```
228
+
229
+ If this fails, you are probably not running in BQuant.
230
+
231
+ # Database
232
+
233
+ ## Database Lifecycle
234
+
235
+ ### Default: In-Memory Database for each iql.execute()
236
+
237
+ By default, a series of iql.execute() calls will create and close an in-memory DuckDB connection for each request.
238
+
239
+ ### Option 1: Keep Database Open
240
+
241
+ Use the iql default connection setting (in-memory only), but leave the connection open:
242
+
243
+ ```
244
+ con = iql.IQL.get_dbconnector().get_connection()
245
+ try:
246
+ iql.execute("CREATE TABLE abc as SELECT * FROM (values(1),(2),(3))", con=con)
247
+ df=iql.execute("SELECT * FROM abc", con=con)
248
+ display(df)
249
+ finally:
250
+ con.close()
251
+ ```
252
+
253
+ SQL statements separated by semicolons. The entire set will be run sequentially against a single database, so side effects will be maintained.
254
+
255
+ ### Option 2: Create Database Externally
256
+
257
+ With this method, you can use a file-based persistent database along with other connectivity options.
258
+
259
+ Or, create a DuckDB Connection [duckdb.connect()](https://duckdb.org/docs/api/python/overview), such as for a file-based persistent database.
260
+
261
+ ```
262
+ df=iql.execute("SELECT * FROM abc", con=con)
263
+ ```
264
+
265
+ # FAQ
266
+
267
+ ## How can I simplify my SQL?
268
+
269
+ There are several approaches to using IQL SubQueries:
270
+
271
+ ### Inline
272
+
273
+ ```
274
+ SELECT fields
275
+ FROM table1
276
+ JOIN table2
277
+ on table1.id=table2.id
278
+ JOIN bql(".....") as k3
279
+ on k3.dates < table2.dates
280
+ WHERE k3.something is true
281
+ ```
282
+
283
+ ### Common Table Expressions (WITH clause)
284
+
285
+ CTEs are necessary when the same subquery will be transformed multiple times within a single query. CTEs are also helpful syntactic sugar: the declaration of a subquery is separate from its use, making the SELECT statement simpler.
286
+
287
+ ```
288
+ WITH k3 as (select * from bql(".....") WHERE something is true)
289
+ SELECT fields
290
+ FROM table1
291
+ JOIN table2
292
+ on table1.id=table2.id
293
+ JOIN k3
294
+ on k3.dates < table2.dates
295
+ ```
296
+
297
+ ### Storing the Data in Tables
298
+
299
+ When data will be accessed by multiple queries, store the data first via CREATE TABLE / CREATE TEMP TABLE instead of running the same IQL SubQueries multiple times. IQL's caching is helpful, if enabled, but storing the data in tables provides more flexibility.
300
+
301
+ ```
302
+ CREATE [TEMP] TABLE k3 as (SELECT * FROM bql(".....") WHERE something is true);
303
+ SELECT fields
304
+ FROM table2
305
+ on table1.id=table2.id
306
+ JOIN k3
307
+ on k3.dates < table2.dates
308
+ ```
309
+
310
+ ## Why DuckDB as the default?
311
+
312
+ We chose [DuckDB](https://duckdb.org/) as the default database module for a few reasons:
313
+
314
+ - DuckDB is awesome and [fast](https://duckdb.org/2021/05/14/sql-on-pandas.html), with vectorized columnar operations.
315
+ - It runs with no setup
316
+ - It runs fully in memory and has support for a variety of data sources
317
+ - DuckDB's SQL language is standard
318
+ - DuckDB has extensive support for the Python ecosystem, including Pandas, PyArrow and Polars
319
+
320
+ ## Why not a DuckDB Extensions?
321
+
322
+ We didn't implement IQL as an extension for a few reasons:
323
+
324
+ - Portability: DuckDB is great, but it's not the only game in town. Engines like SnowFlake are important.
325
+ - Speed of development: Native Python is easy to develop, easy to debug, and convenient to modify and extend.
326
+ - Performance: In our workflows, there was little performance to be gained. Runtime was dominated by external data transfer.
327
+
328
+ We may still implement DuckDB extension(s) to eliminate the extra preprocess/rewrite step.
329
+
330
+ ## Other Databases Engines
331
+
332
+ Any database can be supported by implementing a database module. IQL was written in a syntax neutral method. The key step that's dependent on the database engine is registering (or loading) the SubQuery dataframes to the database engine prior to executing the queries.
333
+
334
+ Modules could be added to support other engines and formats:
335
+
336
+ - [SQLDF](https://pypi.org/project/sqldf/) and [PandaSQL](https://pypi.org/project/pandasql/): Local-only databases that can connect to in-memory Pandas dataframes
337
+ - PyArrow (w/ PySpark/Dask): SubQuery dataframes would be loaded via [pyarrow.Table.from_pandas()](https://arrow.apache.org/docs/python/pandas.html)
338
+ - SnowFlake: During registration step, the Pandas dataframes need to be loaded via the [SnowFlake Pandas Connector](https://docs.snowflake.com/en/user-guide/python-connector-pandas)
339
+ - Other Pandas-centric engines, such as SQLDF and PandaSQL
340
+
341
+ ## Design Principles
342
+
343
+ - Extensibility: Extensions and Database Connectors can be easily modified, replaced, or extended.
344
+ - KISS: Keep it simple. Don't add complexity.
345
+ - REST APIs, such as FRED: Use the complete URL, rather than building yet-another-Python-API
346
+ - Bloomberg BQL: Use native BQL queries without modification
347
+ - Minimal dependencies: Extensions are loaded on-demand. Unused dependencies are not required.
348
+
349
+ # Footnotes
350
+
351
+ ## Useful DuckDB Features
352
+
353
+ ### CTEs
354
+
355
+ ```
356
+ import iql
357
+ df = iql.execute("""
358
+ WITH c AS keyword("..."),
359
+ idx AS keyword("...")
360
+ SELECT c.*, idx.*
361
+ FROM c
362
+ JOIN idx
363
+ ON c.idx=idx.id""")
364
+ display(df)
365
+ ```
366
+
367
+ ### Accessing Global DataFrames:
368
+
369
+ ```
370
+ import iql
371
+ import pandas as pd
372
+
373
+ fun = pd.DataFrame([{'id': 'Someone', 'fun_level': 'High'}])
374
+ iql.execute("""SELECT * FROM fun""")
375
+ ```
376
+
377
+ ### Copy (query) to 'file'
378
+
379
+ ```
380
+ import iql
381
+ iql.execute("""COPY (query) TO 'somefile.parquet'""")
382
+ ```
383
+
384
+ ## Copy to Parquet
385
+
386
+ - Copy to parquet:
387
+ https://duckdb.org/docs/guides/import/parquet_export.html#:~:text=To%20export%20the%20data%20from,exported%20to%20a%20Parquet%20file.
388
+
389
+ # Futures and Ideas
390
+
391
+ ## SQL ReWrite
392
+
393
+ Instead of modifying the SQL in a single step, we could introduce an intermediate statement that has the same logical flow as the code today. This would make it easier to debug, allowing the user to view and debug each step.
394
+
395
+ ```
396
+ SELECT * FROM fred() a JOIN fred() b on a.id=b.id
397
+ ```
398
+
399
+ could be transformed first into:
400
+
401
+ ```
402
+ a=fred();
403
+ b=fred();
404
+ SELECT * FROM a JOIN b
405
+ ```
406
+
407
+ One decision needed here is how to express the first two statements: would we use a CREATE TEMP TABLE or COPY TO to store the SubQuery results, or do we introduce something like CREATE DF.
408
+
409
+ ## Simplifying Parsing
410
+
411
+ We didn't implement a grammar, because each grammar is very platform dependent. Each database has its own product-specific grammar.
412
+
413
+ The current IQL implementation first parses the SQL to extract the named functions, using the sqlparse library, then extracts the IQL subquerys by their named keywords. The SubQueries are then parsed via an AST to extract the parameters and values. Any parsing introduces risks and fragility:
414
+
415
+ - It's possible that sqlparse will fail to parse certain database specific language features. We haven't encountered this yet, but it's something we're thinking about
416
+ - It's also possible that our extraction will fail to recognize proper subqueries, due to how sqlparse extracts the tokens. The code here is not as robust as we'd like, and more testing is needed.
417
+
418
+ There's a few ways to improve this:
419
+
420
+ - Direct string extraction: identify subquery() blocks and extract them directly as strings, rather than parsing the entire SQL file. This would have to properly account for commenting, quoting, and nesting.
421
+ - DuckDB (or whatever platform) extensions: use a lightweight extension to allow the database to externally call the IQL layer, rather than having IQL act as an intermediate step. Or, use a table function, which is not yet supported in SQL, only in relational API.
422
+
423
+ ## Caching
424
+
425
+ The default in-memory cache will grow unbounded within each kernel session. The expiration is only used to invalid data, but expired results are not evicted from memory if not accessed.
426
+
427
+ IQL doesn't provide a default implementation, but the QueryCacheBase is intended to be extended to provide file or S3 caching for large, expensive operations along with more sophisticated caching rules.
428
+
429
+ # Footer
430
+
431
+ Copyright (C) 2024, IQMO Corporation [info@iqmo.com]
432
+ All Rights Reserved
@@ -0,0 +1 @@
1
+ __version__ = "1.8.36"
@@ -0,0 +1,110 @@
1
+ [project]
2
+ name = "IQL"
3
+ description = "I* Query Language"
4
+ authors = [{ name = "Paul T", email = "paul@iqmo.com" }]
5
+ requires-python = "~=3.11"
6
+ readme = "README.md"
7
+ license = "BSD-3-Clause"
8
+ dynamic = ["version"]
9
+ maintainers = [{ name = "Paul T", email = "paul@iqmo.com" }]
10
+ keywords = [
11
+ "iql",
12
+ "query language",
13
+ "duckdb",
14
+ "bql",
15
+ "blpapi",
16
+ ]
17
+ dependencies = [
18
+ "cachetools>=6.1.0",
19
+ "duckdb>=1.2.2",
20
+ "pandas>=2.3.0",
21
+ "pyarrow>=20.0.0",
22
+ "sqlparse>=0.5.3",
23
+ ]
24
+
25
+ [project.urls]
26
+ Homepage = "https://github.com/iqmo-org/iql"
27
+ Repository = "https://github.com/iqmo-org/iql"
28
+ Issues = "https://github.com/iqmo-org/iql/issues"
29
+
30
+ [dependency-groups]
31
+ dev = [
32
+ "pytest>=8.2.1,<9",
33
+ "pytest-cov>=5,<7",
34
+ "pytest-xdist>=3.6.1,<4",
35
+ "pre-commit>=3.7.1,<5.0.0",
36
+ "ipython>=8.31,<10.0",
37
+ "ipykernel>=6.29.5,<7",
38
+ "sqlparse>=0.5.3",
39
+ "jinja2>=3.1.6",
40
+ ]
41
+
42
+ [build-system]
43
+ requires = ["hatchling", "uv-dynamic-versioning"]
44
+ build-backend = "hatchling.build"
45
+
46
+ [tool.setuptools.dynamic]
47
+ version = {attr = "iql._version.__version__"}
48
+
49
+ [tool.hatch.version]
50
+ source = "uv-dynamic-versioning"
51
+
52
+ [tool.hatch.build.hooks.version]
53
+ path = "iql._version.py"
54
+ template = '''
55
+ __version__ = "{version}"
56
+ '''
57
+
58
+ [tool.setuptools]
59
+ packages = ["iql"]
60
+
61
+ [tool.hatch.build.targets.sdist]
62
+ include = ["./iql"]
63
+
64
+ [tool.hatch.build.targets.wheel]
65
+ include = ["./iql"]
66
+
67
+ [tool.hatch.build.targets.wheel.sources]
68
+ "./iql" = "iql"
69
+
70
+ [tool.ruff]
71
+ line-length = 120
72
+ exclude = [
73
+ ".git",
74
+ "__pycache__",
75
+ "build",
76
+ "dist",
77
+ "docs/source/conf.py",
78
+ "tests",
79
+ "tests.ipynb"]
80
+ extend-include = ["*.ipynb"]
81
+
82
+ [tool.ruff.lint]
83
+ # pyflakes, pycodestyle, isort
84
+ select = ["F", "E", "W", "S", "B", "G", "N", "I001", "T", "PD", "C90"]
85
+ ignore = [
86
+ #E203,
87
+ #"W503",
88
+ #"W293",
89
+ "W291",
90
+ "E501",
91
+ "S608",
92
+ "PD901"]
93
+
94
+ [tool.pyright]
95
+ typeCheckingMode = "basic"
96
+ reportGeneralTypeIssues = false
97
+ reportPrivateImportUsage = false
98
+ reportAttributeAccessIssue = false
99
+ exclude = ["tests/**"]
100
+
101
+ [tool.poetry-dynamic-versioning]
102
+ enable = true
103
+ vcs = "git"
104
+ style = "semver"
105
+
106
+ [tool.pytest.ini_options]
107
+ markers = [
108
+ "bqnt: Tests that require BQNT environment",
109
+ "blpapi: Tests that require BLPAPI/Terminal session"
110
+ ]