almagest 2.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. almagest-2.2.1/.gitignore +138 -0
  2. almagest-2.2.1/LICENSE +18 -0
  3. almagest-2.2.1/PKG-INFO +398 -0
  4. almagest-2.2.1/README.md +343 -0
  5. almagest-2.2.1/almagest/abstract_data_exporter.py +153 -0
  6. almagest-2.2.1/almagest/client_helper.py +60 -0
  7. almagest-2.2.1/almagest/data_normalizer.py +350 -0
  8. almagest-2.2.1/almagest/dsl_query/__init__.py +13 -0
  9. almagest-2.2.1/almagest/dsl_query/fluent_dsl_client.py +25 -0
  10. almagest-2.2.1/almagest/dsl_query/mixins/agg.py +141 -0
  11. almagest-2.2.1/almagest/dsl_query/mixins/base_mixin.py +163 -0
  12. almagest-2.2.1/almagest/dsl_query/mixins/date.py +149 -0
  13. almagest-2.2.1/almagest/dsl_query/mixins/match.py +150 -0
  14. almagest-2.2.1/almagest/dsl_query/mixins/pager.py +118 -0
  15. almagest-2.2.1/almagest/templates/.gitkeep +0 -0
  16. almagest-2.2.1/almagest/templates/celestrak_catalog_template.yml +113 -0
  17. almagest-2.2.1/almagest/templates/dnd_catalog_historical.yml +187 -0
  18. almagest-2.2.1/almagest/templates/dnd_catalog_snapshot.yml +187 -0
  19. almagest-2.2.1/almagest/templates/dnd_deltav_protect.yml +103 -0
  20. almagest-2.2.1/almagest/templates/dnd_deltav_public.yml +103 -0
  21. almagest-2.2.1/almagest/templates/dnd_gcat_catalog.yml +207 -0
  22. almagest-2.2.1/almagest/templates/dnd_rotas.yml +423 -0
  23. almagest-2.2.1/almagest/util/__init__.py +0 -0
  24. almagest-2.2.1/almagest/util/logging/__init__.py +0 -0
  25. almagest-2.2.1/almagest/util/logging/simple_logger.py +155 -0
  26. almagest-2.2.1/almagest/util/requests/__init__.py +0 -0
  27. almagest-2.2.1/almagest/util/requests/requests_header_helper.py +287 -0
  28. almagest-2.2.1/almagest/util/requests/simple_session.py +80 -0
  29. almagest-2.2.1/almagest/util/template_generator.py +183 -0
  30. almagest-2.2.1/pyproject.toml +79 -0
@@ -0,0 +1,138 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ prod.env
106
+ .env.prod
107
+ .env
108
+ .venv
109
+ env/
110
+ venv/
111
+ ENV/
112
+ env.bak/
113
+ venv.bak/
114
+
115
+ # Spyder project settings
116
+ .spyderproject
117
+ .spyproject
118
+
119
+ # Rope project settings
120
+ .ropeproject
121
+
122
+ # mkdocs documentation
123
+ /site
124
+
125
+ # mypy
126
+ .mypy_cache/
127
+ .dmypy.json
128
+ dmypy.json
129
+
130
+ # Pyre type checker
131
+ .pyre/
132
+ .vscode/extensions.json
133
+ .vscode/launch.json
134
+ .vscode/project.json
135
+ .vscode/settings.json
136
+ .vscode/tasks.json
137
+
138
+ as_headers/
almagest-2.2.1/LICENSE ADDED
@@ -0,0 +1,18 @@
1
+ # GNU GENERAL PUBLIC LICENSE
2
+
3
+ Version 3, 29 June 2007
4
+
5
+ Copyright (C) [2024] [ICR]
6
+
7
+ This program is free software: you can redistribute it and/or modify
8
+ it under the terms of the GNU General Public License as published by
9
+ the Free Software Foundation, either version 3 of the License, or
10
+ (at your option) any later version.
11
+
12
+ This program is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU General Public License for more details.
16
+
17
+ You should have received a copy of the GNU General Public License
18
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
@@ -0,0 +1,398 @@
1
+ Metadata-Version: 2.4
2
+ Name: almagest
3
+ Version: 2.2.1
4
+ Summary: This module provides a robust, singleton-based client manager for connecting to an OpenSearch cluster.
5
+ Author-email: Greg Stewart <gstew77@gmail.com>
6
+ License: # GNU GENERAL PUBLIC LICENSE
7
+
8
+ Version 3, 29 June 2007
9
+
10
+ Copyright (C) [2024] [ICR]
11
+
12
+ This program is free software: you can redistribute it and/or modify
13
+ it under the terms of the GNU General Public License as published by
14
+ the Free Software Foundation, either version 3 of the License, or
15
+ (at your option) any later version.
16
+
17
+ This program is distributed in the hope that it will be useful,
18
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
+ GNU General Public License for more details.
21
+
22
+ You should have received a copy of the GNU General Public License
23
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
24
+ License-File: LICENSE
25
+ Requires-Python: >=3.10
26
+ Requires-Dist: apscheduler>=3.10.3
27
+ Requires-Dist: nested-lookup>=0.2.25
28
+ Requires-Dist: numpy
29
+ Requires-Dist: opensearch-dsl
30
+ Requires-Dist: opensearch-logger
31
+ Requires-Dist: opensearch-py
32
+ Requires-Dist: pandas
33
+ Requires-Dist: pyopenssl
34
+ Requires-Dist: python-json-logger==2.0.7
35
+ Requires-Dist: pyyaml
36
+ Provides-Extra: build
37
+ Requires-Dist: build==1.2.2.post1; extra == 'build'
38
+ Requires-Dist: wheel==0.45.1; extra == 'build'
39
+ Provides-Extra: deploy
40
+ Requires-Dist: twine==6.1.0; extra == 'deploy'
41
+ Provides-Extra: dev
42
+ Requires-Dist: commitizen==4.8.3; extra == 'dev'
43
+ Requires-Dist: flake8-pytest-style==2.1.0; extra == 'dev'
44
+ Requires-Dist: flake8==7.3.0; extra == 'dev'
45
+ Requires-Dist: mockito==1.5.4; extra == 'dev'
46
+ Requires-Dist: pre-commit; extra == 'dev'
47
+ Requires-Dist: pytest-cov==6.2.1; extra == 'dev'
48
+ Requires-Dist: pytest==8.4.1; extra == 'dev'
49
+ Requires-Dist: ruff==0.12.3; extra == 'dev'
50
+ Provides-Extra: test
51
+ Requires-Dist: mockito==1.5.4; extra == 'test'
52
+ Requires-Dist: pytest-cov==6.2.1; extra == 'test'
53
+ Requires-Dist: pytest==8.4.1; extra == 'test'
54
+ Description-Content-Type: text/markdown
55
+
56
+ - [Development](#development)
57
+ - [OpenSearch Client Helper](#opensearch-client-helper)
58
+ - [๐Ÿ”’ Singleton Pattern Implementation](#-singleton-pattern-implementation)
59
+ - [๐Ÿ›  `ClientHelper` Class](#-clienthelper-class)
60
+ - [1. Environment Variable Configuration](#1-environment-variable-configuration)
61
+ - [2. Client Initialization `get_client`](#2-client-initialization-get_client)
62
+ - [3. Property Access](#3-property-access)
63
+ - [Usage Example](#usage-example)
64
+ - [Data Normalizer](#data-normalizer)
65
+ - [๐Ÿ— Core Architecture](#-core-architecture)
66
+ - [๐Ÿ”‘ Key Features](#-key-features)
67
+ - [1. Strict Parameter Validation](#1-strict-parameter-validation)
68
+ - [2. Data Cleaning & Standardization](#2-data-cleaning--standardization)
69
+ - [3. Dynamic Index Partitioning](#3-dynamic-index-partitioning)
70
+ - [4. Deterministic Unique IDs](#4-deterministic-unique-ids)
71
+ - [5. Bulk API Formatting](#5-bulk-api-formatting)
72
+ - [๐Ÿš€ Usage Workflow](#-usage-workflow)
73
+ - [โš™๏ธ Configuration Options](#๏ธ-configuration-options)
74
+ - [Template Generator](#template-generator)
75
+ - [๐Ÿ— Core Workflow](#-core-workflow)
76
+ - [๐Ÿ”‘ Key Features](#-key-features-1)
77
+ - [1. Dynamic Mapping Inference](#1-dynamic-mapping-inference)
78
+ - [2. Automatic Cleanup](#2-automatic-cleanup)
79
+ - [3. Standardized Settings](#3-standardized-settings)
80
+ - [4. Safety Checks](#4-safety-checks)
81
+ - [๐Ÿš€ Usage Example](#-usage-example)
82
+ - [โš™๏ธ Method Overview](#๏ธ-method-overview)
83
+ - [Fluent DSL Client](#fluent-dsl-client)
84
+ - [๐Ÿ— Core Architecture](#-core-architecture-1)
85
+ - [๐Ÿ”‘ Composed Mixins](#-composed-mixins)
86
+ - [๐Ÿš€ Usage Example](#-usage-example-1)
87
+ - [๐Ÿงช Testing the Fluent Chain & Preferred Usage](#-testing-the-fluent-chain--preferred-usage)
88
+
89
+
90
+ ## Development
91
+
92
+ To be developed/deployed against the latest stable release of Python 3.9 or later
93
+
94
+
95
+ 1. Clone this repo, you should be in the /your/cloned/directory/almagest directory. If not, cd to that directory.
96
+ 2. Source the dev setup script to setup your environment:
97
+ 1. This script will create a virtual environment, install the required dependencies, activate the new environment and setup pre-commit hooks. If you are using VSCODE, it should recognize the virtual environment install and prompt to configure your python interpreter to use the newly created virtual environment.
98
+ 1. There are 4 optional arguments:
99
+ * `-v` flag specifies which version of python to use (example: `-v 3.11`)
100
+ * `-d` installs build, dev, and test dependencies in editable mode
101
+ * `-b` installs build dependencies only
102
+ * `-a` installs all dependencies in editable mode/
103
+ 1. To use the default python version and setup an editable dev environment with all dependencies installed run:
104
+
105
+ ```bash
106
+ source scripts/setup_python_environment.sh -a
107
+ ```
108
+
109
+ ## OpenSearch Client Helper
110
+
111
+ This module provides a robust, singleton-based client manager for connecting to an OpenSearch cluster. It ensures that only one instance of the client configuration exists throughout the application lifecycle, preventing redundant connections and centralizing environment variable management.
112
+
113
+ ### ๐Ÿ”’ Singleton Pattern Implementation
114
+
115
+ The code utilizes a custom `Singleton` metaclass to enforce the **Singleton Design Pattern**:
116
+
117
+ * **`Singleton` Metaclass**: Overrides `__init__` and `__call__` to ensure that `ClientHelper` can only be instantiated once. Subsequent calls to `ClientHelper()` return the original instance.
118
+ * **Benefit**: This guarantees that environment variables are read only once and that the application maintains a single source of truth for connection credentials and host configuration.
119
+
120
+ ### ๐Ÿ›  `ClientHelper` Class
121
+
122
+ The `ClientHelper` class encapsulates the logic for initializing and retrieving the OpenSearch client.
123
+
124
+ #### 1. Environment Variable Configuration
125
+ Upon initialization, the class strictly requires the following environment variables to be set:
126
+ * `OPENSEARCH_HOST`
127
+ * `OPENSEARCH_USER`
128
+ * `OPENSEARCH_PW`
129
+
130
+ If any of these are missing, a `ValueError` is raised immediately, failing fast to prevent runtime connection errors later in the execution flow.
131
+
132
+ #### 2. Client Initialization (`get_client`)
133
+ The `get_client` class method is the primary entry point for obtaining a connected client:
134
+ * **Authentication**: Retrieves stored credentials and constructs an HTTP auth tuple.
135
+ * **Connection Settings**: Configures the `OpenSearch` client with:
136
+ * **SSL/TLS**: Enabled (`use_ssl=True`, `scheme="https"`) on port `443`.
137
+ * **Certificate Verification**: Disabled (`verify_certs=False`) with warnings enabled (`ssl_show_warn=True`). *Note: Disabling verification is common in development but should be reviewed for production environments.*
138
+ * **Health Check**: Executes a `client.ping()` to verify connectivity. If the ping fails, a `ValueError` is raised.
139
+
140
+ #### 3. Property Access
141
+ * **`host`**: A read-only property that exposes the configured OpenSearch host string, allowing other parts of the application to inspect the target endpoint without exposing internal state directly.
142
+
143
+ ### Usage Example
144
+
145
+ ```python
146
+ # The first call initializes the singleton and reads env vars
147
+ client = ClientHelper.get_client()
148
+
149
+ # Subsequent calls return the same configured instance
150
+ same_client = ClientHelper.get_client()
151
+
152
+ assert client is same_client # True
153
+ ```
154
+
155
+ ## Data Normalizer
156
+
157
+ This module provides the `DataNormalizer` class, a robust utility designed to standardize, validate, and prepare raw data dictionaries for ingestion into an OpenSearch cluster. It leverages `pandas` for efficient data manipulation and ensures strict schema compliance before bulk indexing.
158
+
159
+ ### ๐Ÿ— Core Architecture
160
+
161
+ The class operates on a list of dictionaries (records) and performs the following high-level tasks:
162
+ 1. **Validation**: Ensures mandatory fields (classification, dates, provider info) exist and are valid.
163
+ 2. **Normalization**: Cleans data types, handles `NaN`/`None` values, and standardizes date formats.
164
+ 3. **Partitioning**: Dynamically generates OpenSearch index names based on time-based strategies (yearly, monthly, weekly, daily).
165
+ 4. **Bulkification**: Formats data into the specific JSON structure required by the OpenSearch Bulk API, including generating deterministic unique IDs.
166
+
167
+ ### ๐Ÿ”‘ Key Features
168
+
169
+ #### 1. Strict Parameter Validation
170
+ The class uses a custom decorator `@standard_params` to enforce the presence of critical configuration arguments before executing methods like `standardize` or `bulkify`. Missing parameters trigger immediate `ValueError` exceptions.
171
+ * **Required Params**: `classification_field`, `start_date_field`, `provider_details`, `unique_fields`.
172
+
173
+ #### 2. Data Cleaning & Standardization
174
+ The `standardize` method performs several cleanup operations:
175
+ * **Null Handling**: Replaces various `NaN` representations (`np.nan`, `None`, `"nan"`, `"NAN"`) and empty-like strings (only spaces, dashes, or plus signs) with empty strings `""`.
176
+ * **Classification Mapping**: Renames a user-specified classification column to the standard `classification` field.
177
+ * **Date Parsing**: Converts a specified date field into `startTimestamp` and enriches the dataset with derived columns for visualization:
178
+ * `monthOfYear_zulu`
179
+ * `dayOfWeek_zulu`
180
+ * `hourOfDay_zulu`
181
+ * **Provider Injection**: Validates or injects `dataProvider` and `dataProviderUrl` fields, ensuring no records have missing provider information.
182
+ * **Ingest Timestamp**: Automatically adds an `ingestTimestamp` column with the current execution time.
183
+
184
+ #### 3. Dynamic Index Partitioning
185
+ The `_transform_index_suffix` method intelligently routes records to specific indices based on the `index_partition_date_format` configuration:
186
+ * **Supported Formats**:
187
+ * `none` / `timeless`: All data goes to a single `_all_time` index.
188
+ * `yearly`: `{alias}_v{version}_{YYYY}`
189
+ * `monthly`: `{alias}_v{version}_{YYYYMM}`
190
+ * `weekly`: `{alias}_v{version}_{YYYYww}`
191
+ * `daily`: `{alias}_v{version}_{YYYYMMDD}`
192
+ * **Logic**: It parses the date field, extracts relevant time components, constructs the index name per record, and groups the data into a dictionary where keys are index names and values are lists of records.
193
+
194
+ #### 4. Deterministic Unique IDs
195
+ To prevent duplicate documents in OpenSearch, the `_create_unique_id` method generates a SHA-256 hash:
196
+ * It sorts the values of specified `unique_fields` (or all fields if none are specified).
197
+ * It concatenates these values into a string.
198
+ * It returns the uppercase hexadecimal hash, which serves as the `_id` in the bulk request.
199
+
200
+ #### 5. Bulk API Formatting
201
+ The `_bulkify` method transforms the cleaned DataFrame rows into the specific action/metadata format required by `opensearchpy.helpers.bulk`:
202
+ ```python
203
+ {
204
+ "_index": "my-index-v1-202310",
205
+ "_id": "A1B2C3D4...", # SHA-256 Hash
206
+ "_source": { ... } # The actual record data
207
+ }
208
+ ```
209
+
210
+ ### ๐Ÿš€ Usage Workflow
211
+
212
+ The primary entry point is the `standardize_and_bulkify` method, which chains the entire process:
213
+
214
+ ```python
215
+ from data_normalizer import DataNormalizer
216
+
217
+ raw_data = [
218
+ {"id": 1, "date": "2023-10-27", "type": "A", "provider": "SourceX"},
219
+ {"id": 2, "date": "2023-10-28", "type": "B", "provider": "SourceX"}
220
+ ]
221
+
222
+ normalizer = DataNormalizer(
223
+ data=raw_data,
224
+ alias="logs",
225
+ index_partition_date_format="monthly"
226
+ )
227
+
228
+ # Execute the full pipeline
229
+ bulk_records = normalizer.standardize_and_bulkify(
230
+ classification_field="type",
231
+ start_date_field="date",
232
+ provider_details={"dataProvider": "SourceX", "dataProviderUrl": "https://source.x"},
233
+ unique_fields=["id"]
234
+ )
235
+
236
+ # bulk_records is now ready for client.bulk()
237
+ ```
238
+
239
+ ### โš™๏ธ Configuration Options
240
+
241
+ | Parameter | Description |
242
+ | :--- | :--- |
243
+ | `data` | List of input dictionaries. |
244
+ | `alias` | Base name for the OpenSearch index alias. |
245
+ | `index_partition_date_format` | Strategy for splitting indices: `none`, `timeless`, `yearly`, `monthly`, `weekly`, `daily`. |
246
+ | `index_version` | Integer version number appended to index names (useful for reindexing). |
247
+
248
+
249
+ ## Template Generator
250
+
251
+ This module provides the `TemplateGenerator` class, a utility designed to automate the creation of **OpenSearch Index Templates** based on sample data. By analyzing a representative dataset, it leverages OpenSearch's dynamic mapping inference to generate robust, reusable templates that ensure schema consistency for future data ingestion.
252
+
253
+ ### ๐Ÿ— Core Workflow
254
+
255
+ The class orchestrates the following lifecycle:
256
+ 1. **Validation**: Verifies input arguments (`alias`, date format, standard parameters).
257
+ 2. **Normalization**: Processes sample data using `DataNormalizer` to ensure schema compliance.
258
+ 3. **Mapping Inference**: Bulk indexes the normalized data into a **temporary index**, allowing OpenSearch to automatically infer field types and mappings.
259
+ 4. **Template Construction**: Extracts the generated mappings and wraps them in a formal template body with standardized settings (shards, replicas, aliases).
260
+ 5. **Cleanup & Registration**: Deletes the temporary index and registers the new template under the specified alias.
261
+
262
+ ### ๐Ÿ”‘ Key Features
263
+
264
+ #### 1. Dynamic Mapping Inference
265
+ Instead of manually defining complex mapping JSON, this class uses real data to let OpenSearch determine the optimal field types. This reduces human error and adapts to evolving data structures.
266
+
267
+ #### 2. Automatic Cleanup
268
+ The process is non-destructive to the cluster's storage. A temporary index is created solely for the analysis phase and is **automatically deleted** once the mappings are extracted and the template is saved.
269
+
270
+ #### 3. Standardized Settings
271
+ The generated template enforces consistent cluster configurations:
272
+ * **Shards**: Fixed to `1`.
273
+ * **Replicas**: Fixed to `2`.
274
+ * **Aliases**: Automatically associates the template with the provided alias name.
275
+ * **Patterns**: Applies to all indices matching `{alias}_*`.
276
+
277
+ #### 4. Safety Checks
278
+ * **Existence Check**: Before generating, it checks if a template with the same name already exists to prevent accidental overwrites.
279
+ * **Argument Validation**: Strictly validates `index_details` and `standard_params` before execution to fail fast on configuration errors.
280
+ * **Index Verification**: Ensures all target indices derived from the normalizer exist (creating them if necessary) before attempting bulk operations.
281
+
282
+ ### ๐Ÿš€ Usage Example
283
+
284
+ ```python
285
+ from almagest.template_generator import TemplateGenerator
286
+
287
+ # Sample data representing the final schema
288
+ sample_data = [
289
+ {"id": 1, "timestamp": "2023-10-27T10:00:00Z", "status": "active", "count": 42},
290
+ {"id": 2, "timestamp": "2023-10-28T11:30:00Z", "status": "inactive", "count": 15}
291
+ ]
292
+
293
+ generator = TemplateGenerator()
294
+
295
+ try:
296
+ generator.generate_template(
297
+ data=sample_data,
298
+ index_details={
299
+ "alias": "app-logs",
300
+ "index_partition_date_format": "monthly",
301
+ "index_version": 1
302
+ },
303
+ standard_params={
304
+ "classification_field": "status",
305
+ "start_date_field": "timestamp",
306
+ "provider_details": {"dataProvider": "MyApp", "dataProviderUrl": "https://myapp.com"},
307
+ "unique_fields": ["id"]
308
+ }
309
+ )
310
+ # Output: Template 'app-logs' successfully created.
311
+ except ValueError as e:
312
+ print(f"Generation failed: {e}")
313
+ ```
314
+
315
+ ## Fluent DSL Client
316
+
317
+ The `FluentDslClient` is the primary interface for constructing and executing complex OpenSearch queries. It composes multiple functional mixins to provide a unified, chainable API that handles matching, aggregation, date filtering, and pagination seamlessly.
318
+
319
+ ### ๐Ÿ— Core Architecture
320
+
321
+ Instead of creating monolithic wrapper classes, the client inherits from a hierarchy of specialized mixins. This allows for modular functionality while maintaining a single, coherent object state. The class combines `MatchMixin`, `AggMixin`, `DateMixin`, `PagerMixin`, and `BaseMixin` to offer a comprehensive toolkit for query building.
322
+
323
+ ### ๐Ÿ”‘ Composed Mixins
324
+
325
+ - **`MatchMixin`**: Handles boolean logic (`must`, `filter`, `must_not`), term matches, and existence checks.
326
+ - **`DateMixin`**: Provides helpers for range queries (`between`, `after`, `before`) with automatic ISO formatting.
327
+ - **`AggMixin`**: Manages complex aggregations, specifically Point-in-Time (PIT) based composite aggregations for deep pagination and "latest/earliest" document retrieval.
328
+ - **`PagerMixin`**: Orchestrates the `search_after` loop to fetch large datasets efficiently without deep pagination penalties.
329
+ - **`BaseMixin`**: Initializes the underlying `opensearchpy.Search` object, manages the client connection, and handles shared state.
330
+
331
+ ### ๐Ÿš€ Usage Example
332
+
333
+ ```python
334
+ from almagest.client import FluentDslClient
335
+ import datetime as dt
336
+
337
+ # Initialize the client targeting a specific index
338
+ client = FluentDslClient(index="app-logs-v1")
339
+
340
+ start = dt.datetime(2026, 3, 1)
341
+ end = dt.datetime(2026, 3, 6)
342
+
343
+ # Chain methods to build a complex query
344
+ results = (
345
+ client
346
+ .between("timestamp", start, end) # DateMixin: Range filter
347
+ .exactly("status", "error") # MatchMixin: Term filter
348
+ .search_after(timeout=30) # PagerMixin: Execute with pagination
349
+ )
350
+
351
+ # 'results' contains the flattened list of hits from all pages
352
+ for hit in results:
353
+ print(f"Error at {hit['timestamp']}: {hit['msg']}")
354
+ ```
355
+
356
+ ### ๐Ÿงช Testing the Fluent Chain & Preferred Usage
357
+
358
+ The `FluentDslClient` is rigorously tested to ensure state is correctly passed between mixins and that the final DSL generation matches expectations.
359
+
360
+ **Preferred Usage Pattern:**
361
+ The standard workflow is to chain all configuration methods (filtering, dating, aggregating) and terminate the chain with the **`search_after()`** method. This method acts as the execution trigger, handling the internal pagination loop automatically.
362
+
363
+ * **Why `search_after()`?** Unlike standard `.execute()` calls, `search_after()` manages the cursor-based pagination loop internally. It fetches the first page, extracts the sort cursor, and continues fetching subsequent pages until the dataset is exhausted (or a limit is reached), returning a flattened list of results.
364
+ * **Reference:** See `test_fluent_chain_with_pagination_loop` for the canonical implementation pattern.
365
+
366
+ **Key Test Scenarios:**
367
+ 1. **Date & Match Integration**: Verifies that `between()` correctly formats ISO dates and combines them with `exactly()` term queries before execution.
368
+ 2. **Pagination Loop**: Ensures `search_after()` correctly extracts cursors from responses and iterates until no more results are found.
369
+ 3. **Aggregation Setup**: Confirms that `latest()` configures the composite aggregation sources with correct keyword suffixes (e.g., `user_id.keyword`) prior to the `search_after()` call.
370
+ 4. **Complex Chains**: Validates that combining Date, Match, and Agg mixins results in a valid DSL body containing `query`, `aggs`, and `pit` (Point-in-Time) blocks when executed.
371
+
372
+ *Example Test Assertion Logic:*
373
+ ```python
374
+ # 1. Chain configuration methods
375
+ # (DateMixin)
376
+ assert len(dsl_client._range_calls) == 2
377
+ fields = [call[0] for call in dsl_client._range_calls]
378
+ assert fields == ["timestamp", "timestamp"]
379
+
380
+ # (MatchMixin)
381
+ assert len(dsl_client._must) == 1
382
+ term_query = dsl_client._must[0]
383
+ expected = {"term": {"status": "error"}}
384
+
385
+ # 2. Execute via search_after() (The Preferred Trigger)
386
+ # This triggers the internal loop defined in PagerMixin
387
+ results = dsl_client.search_after(timeout=30)
388
+
389
+ # 3. Verify Execution State
390
+ # The PagerMixin should have updated internal args with the cursor from the last hit
391
+ assert dsl_client._search._extra_args.get("search_after") == ["cursor_abc"]
392
+
393
+ # 4. Verify Final DSL Structure (if inspecting raw DSL)
394
+ dsl_client.pit_id = "pit-xyz"
395
+ final_body = dsl_client.to_dict()
396
+ assert "aggs" in final_body
397
+ assert final_body.get("size") == 0 # Size 0 is typical for pure aggregation queries
398
+