datafun 0.5.2__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datafun-0.5.2 → datafun-0.6.0}/PKG-INFO +2 -2
- {datafun-0.5.2 → datafun-0.6.0}/README.md +1 -1
- {datafun-0.5.2 → datafun-0.6.0}/datafun/sources/elk.py +11 -3
- {datafun-0.5.2 → datafun-0.6.0}/datafun.egg-info/PKG-INFO +2 -2
- {datafun-0.5.2 → datafun-0.6.0}/pyproject.toml +1 -1
- {datafun-0.5.2 → datafun-0.6.0}/LICENSE +0 -0
- {datafun-0.5.2 → datafun-0.6.0}/datafun/__init__.py +0 -0
- {datafun-0.5.2 → datafun-0.6.0}/datafun/cache.py +0 -0
- {datafun-0.5.2 → datafun-0.6.0}/datafun/dataset.py +0 -0
- {datafun-0.5.2 → datafun-0.6.0}/datafun/sources/__init__.py +0 -0
- {datafun-0.5.2 → datafun-0.6.0}/datafun/sources/gcs.py +0 -0
- {datafun-0.5.2 → datafun-0.6.0}/datafun/sources/iterable.py +0 -0
- {datafun-0.5.2 → datafun-0.6.0}/datafun/sources/local_file.py +0 -0
- {datafun-0.5.2 → datafun-0.6.0}/datafun/sources/rest.py +0 -0
- {datafun-0.5.2 → datafun-0.6.0}/datafun/utils.py +0 -0
- {datafun-0.5.2 → datafun-0.6.0}/datafun.egg-info/SOURCES.txt +0 -0
- {datafun-0.5.2 → datafun-0.6.0}/datafun.egg-info/dependency_links.txt +0 -0
- {datafun-0.5.2 → datafun-0.6.0}/datafun.egg-info/requires.txt +0 -0
- {datafun-0.5.2 → datafun-0.6.0}/datafun.egg-info/top_level.txt +0 -0
- {datafun-0.5.2 → datafun-0.6.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datafun
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: datafun brings the fun back to data pipelines
|
|
5
5
|
Author-email: "Diego Giorgini, Luigi Di Sotto, Saeed Choobani" <diego.giorgini@aitechnologies.it>
|
|
6
6
|
Requires-Python: >=3.8
|
|
@@ -304,7 +304,7 @@ You can see examples for every operation in the [dedicated notebook](./examples/
|
|
|
304
304
|
| **start_isodate** | str (ISO datetime) | Yes | | Elastic start date range with format: "2021-09-15T10:00:00.000Z" |
|
|
305
305
|
| **end_isodate** | str (ISO datetime) | Yes | | Elastic end date range with format: "2021-09-15T10:00:00.000Z" |
|
|
306
306
|
| **date_field** | str | No | @timestamp | Elastic date field. Can be nested into list, eg. "messages.date" |
|
|
307
|
-
| **date_field_separator** | str | No | . | Separator for date_field used to split the path. Use different ones to NOT split and consider date_field as single field |
|
|
307
|
+
| **date_field_separator** | str | No | . | [DEPRECATED] (separator automatically inferred) Separator for date_field used to split the path. Use different ones to NOT split and consider date_field as single field |
|
|
308
308
|
|
|
309
309
|
**Returned element type**: ```dict```. Each element is a document matching the given query.
|
|
310
310
|
|
|
@@ -289,7 +289,7 @@ You can see examples for every operation in the [dedicated notebook](./examples/
|
|
|
289
289
|
| **start_isodate** | str (ISO datetime) | Yes | | Elastic start date range with format: "2021-09-15T10:00:00.000Z" |
|
|
290
290
|
| **end_isodate** | str (ISO datetime) | Yes | | Elastic end date range with format: "2021-09-15T10:00:00.000Z" |
|
|
291
291
|
| **date_field** | str | No | @timestamp | Elastic date field. Can be nested into list, eg. "messages.date" |
|
|
292
|
-
| **date_field_separator** | str | No | . | Separator for date_field used to split the path. Use different ones to NOT split and consider date_field as single field |
|
|
292
|
+
| **date_field_separator** | str | No | . | [DEPRECATED] (separator automatically inferred) Separator for date_field used to split the path. Use different ones to NOT split and consider date_field as single field |
|
|
293
293
|
|
|
294
294
|
**Returned element type**: ```dict```. Each element is a document matching the given query.
|
|
295
295
|
|
|
@@ -31,6 +31,8 @@ class ELKDatasetConfig:
|
|
|
31
31
|
|
|
32
32
|
class ELKDataset(DatasetSource):
|
|
33
33
|
def __init__(self, config: ELKDatasetConfig, **kwargs):
|
|
34
|
+
if 'date_field_separator' in kwargs:
|
|
35
|
+
print("WARN: date_field_separator is deprecated, the separator is now automatically inferred")
|
|
34
36
|
super().__init__(config=config, **kwargs)
|
|
35
37
|
|
|
36
38
|
self.es = Elasticsearch(
|
|
@@ -94,11 +96,17 @@ class ELKDataset(DatasetSource):
|
|
|
94
96
|
if not isinstance(xs, List):
|
|
95
97
|
raise TypeError(f'Field query.bool.filter must be of type List, but found of type {type(xs)}')
|
|
96
98
|
|
|
97
|
-
|
|
99
|
+
path_sep_alts = ['/', '//--@@--//']
|
|
100
|
+
path_sep = '.'
|
|
101
|
+
while path_sep in self.config.date_field:
|
|
102
|
+
try:
|
|
103
|
+
path_sep = path_sep_alts.pop(0)
|
|
104
|
+
except Exception as e:
|
|
105
|
+
raise ValueError(f'Field {self.config.date_field} contains invalid characters. Exception: {e}')
|
|
98
106
|
for idx, obj in enumerate(xs):
|
|
99
107
|
if dl.has(obj, "range"):
|
|
100
|
-
obj = dl.update(obj, f"range{
|
|
101
|
-
obj = dl.update(obj, f"range{
|
|
108
|
+
obj = dl.update(obj, f"range{path_sep}{self.config.date_field}{path_sep}gte", value=self.config.start_isodate, sep=path_sep)
|
|
109
|
+
obj = dl.update(obj, f"range{path_sep}{self.config.date_field}{path_sep}lte", value=self.config.end_isodate, sep=path_sep)
|
|
102
110
|
if not obj:
|
|
103
111
|
raise ValueError(f'{self.config.date_field}.lte or {self.config.date_field}.lte fields can\'t be updated, e.g. check '
|
|
104
112
|
'if they exist in the query.')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datafun
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: datafun brings the fun back to data pipelines
|
|
5
5
|
Author-email: "Diego Giorgini, Luigi Di Sotto, Saeed Choobani" <diego.giorgini@aitechnologies.it>
|
|
6
6
|
Requires-Python: >=3.8
|
|
@@ -304,7 +304,7 @@ You can see examples for every operation in the [dedicated notebook](./examples/
|
|
|
304
304
|
| **start_isodate** | str (ISO datetime) | Yes | | Elastic start date range with format: "2021-09-15T10:00:00.000Z" |
|
|
305
305
|
| **end_isodate** | str (ISO datetime) | Yes | | Elastic end date range with format: "2021-09-15T10:00:00.000Z" |
|
|
306
306
|
| **date_field** | str | No | @timestamp | Elastic date field. Can be nested into list, eg. "messages.date" |
|
|
307
|
-
| **date_field_separator** | str | No | . | Separator for date_field used to split the path. Use different ones to NOT split and consider date_field as single field |
|
|
307
|
+
| **date_field_separator** | str | No | . | [DEPRECATED] (separator automatically inferred) Separator for date_field used to split the path. Use different ones to NOT split and consider date_field as single field |
|
|
308
308
|
|
|
309
309
|
**Returned element type**: ```dict```. Each element is a document matching the given query.
|
|
310
310
|
|
|
@@ -8,7 +8,7 @@ authors = [
|
|
|
8
8
|
{ name = "Diego Giorgini, Luigi Di Sotto, Saeed Choobani", email = "diego.giorgini@aitechnologies.it" }
|
|
9
9
|
]
|
|
10
10
|
description = "datafun brings the fun back to data pipelines"
|
|
11
|
-
version = "0.
|
|
11
|
+
version = "0.6.0"
|
|
12
12
|
requires-python = ">=3.8"
|
|
13
13
|
dependencies = [
|
|
14
14
|
"backoff",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|