ftmq 4.3.2__tar.gz → 4.5.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ftmq-4.3.2 → ftmq-4.5.3}/PKG-INFO +10 -11
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/__init__.py +1 -1
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/cli.py +1 -1
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/io.py +3 -4
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/model/dataset.py +13 -5
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/model/mixins.py +1 -1
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/model/stats.py +2 -1
- ftmq-4.5.3/ftmq/py.typed +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/store/base.py +13 -6
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/store/lake.py +110 -49
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/store/sql.py +7 -5
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/util.py +19 -24
- {ftmq-4.3.2 → ftmq-4.5.3}/pyproject.toml +14 -14
- ftmq-4.3.2/ftmq/logging.py +0 -105
- {ftmq-4.3.2 → ftmq-4.5.3}/LICENSE +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/NOTICE +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/README.md +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/aggregate.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/aggregations.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/enums.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/filters.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/model/__init__.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/model/entity.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/query.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/similar.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/sql.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/store/__init__.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/store/aleph.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/store/fragments/__init__.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/store/fragments/dataset.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/store/fragments/loader.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/store/fragments/settings.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/store/fragments/store.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/store/fragments/utils.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/store/level.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/store/memory.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/store/redis.py +0 -0
- {ftmq-4.3.2 → ftmq-4.5.3}/ftmq/types.py +0 -0
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ftmq
|
|
3
|
-
Version: 4.3
|
|
3
|
+
Version: 4.5.3
|
|
4
4
|
Summary: followthemoney query dsl and io helpers
|
|
5
5
|
License: AGPLv3+
|
|
6
6
|
License-File: LICENSE
|
|
7
7
|
License-File: NOTICE
|
|
8
8
|
Author: Simon Wörpel
|
|
9
9
|
Author-email: simon.woerpel@pm.me
|
|
10
|
-
Requires-Python: >=3.11,<
|
|
10
|
+
Requires-Python: >=3.11,<3.14
|
|
11
11
|
Classifier: Intended Audience :: Developers
|
|
12
12
|
Classifier: Operating System :: OS Independent
|
|
13
13
|
Classifier: Programming Language :: Python
|
|
@@ -21,25 +21,24 @@ Provides-Extra: postgres
|
|
|
21
21
|
Provides-Extra: redis
|
|
22
22
|
Provides-Extra: sql
|
|
23
23
|
Requires-Dist: alephclient (>=2.6.0,<3.0.0) ; extra == "aleph"
|
|
24
|
-
Requires-Dist: anystore (>=0.
|
|
24
|
+
Requires-Dist: anystore (>=1.0.1,<2.0.0)
|
|
25
25
|
Requires-Dist: click (>=8.2.1,<9.0.0)
|
|
26
26
|
Requires-Dist: click-default-group (>=1.2.4,<2.0.0)
|
|
27
|
-
Requires-Dist: deltalake (>=1.
|
|
28
|
-
Requires-Dist: duckdb (>=1.4.
|
|
27
|
+
Requires-Dist: deltalake (>=1.4.1,<2.0.0) ; extra == "lake"
|
|
28
|
+
Requires-Dist: duckdb (>=1.4.4,<2.0.0) ; extra == "lake"
|
|
29
29
|
Requires-Dist: fakeredis (>=2.26.2,<3.0.0) ; extra == "redis"
|
|
30
|
-
Requires-Dist: followthemoney (>=4.
|
|
30
|
+
Requires-Dist: followthemoney (>=4.5.2,<5.0.0)
|
|
31
31
|
Requires-Dist: furl (>=2.1.4,<3.0.0) ; extra == "aleph"
|
|
32
|
-
Requires-Dist: nomenklatura (>=4.1
|
|
32
|
+
Requires-Dist: nomenklatura (>=4.6.1,<5.0.0)
|
|
33
33
|
Requires-Dist: orjson (>=3.10.18,<4.0.0)
|
|
34
|
-
Requires-Dist: pandas (>=
|
|
34
|
+
Requires-Dist: pandas (>=3.0.0,<4.0.0) ; extra == "lake"
|
|
35
35
|
Requires-Dist: plyvel (>=1.5.1,<2.0.0) ; extra == "level"
|
|
36
36
|
Requires-Dist: psycopg[pool] (>=3.2.9,<4.0.0) ; extra == "postgres"
|
|
37
|
-
Requires-Dist: pyarrow (>=
|
|
38
|
-
Requires-Dist: pycountry (>=24.6.1,<25.0.0)
|
|
37
|
+
Requires-Dist: pyarrow (>=23.0.0,<24.0.0) ; extra == "lake"
|
|
39
38
|
Requires-Dist: pydantic (>=2.11.3,<3.0.0)
|
|
40
39
|
Requires-Dist: pyicu (>=2.15.2,<3.0.0)
|
|
41
40
|
Requires-Dist: redis (>=5.2.1,<6.0.0) ; extra == "redis"
|
|
42
|
-
Requires-Dist: rigour (>=1.
|
|
41
|
+
Requires-Dist: rigour (>=1.6.2,<2.0.0)
|
|
43
42
|
Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0) ; extra == "postgres"
|
|
44
43
|
Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0) ; extra == "sql"
|
|
45
44
|
Project-URL: Documentation, https://docs.investigraph.dev/lib/ftmq
|
|
@@ -2,13 +2,13 @@ from datetime import datetime
|
|
|
2
2
|
|
|
3
3
|
import click
|
|
4
4
|
from anystore.io import smart_write, smart_write_json, smart_write_model
|
|
5
|
+
from anystore.logging import configure_logging, get_logger
|
|
5
6
|
from click_default_group import DefaultGroup
|
|
6
7
|
from followthemoney import ValueEntity
|
|
7
8
|
from nomenklatura import settings
|
|
8
9
|
|
|
9
10
|
from ftmq.aggregate import aggregate
|
|
10
11
|
from ftmq.io import smart_read_proxies, smart_write_proxies
|
|
11
|
-
from ftmq.logging import configure_logging, get_logger
|
|
12
12
|
from ftmq.model.dataset import Catalog, Dataset
|
|
13
13
|
from ftmq.model.stats import Collector
|
|
14
14
|
from ftmq.query import Query
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from typing import Any, Iterable, Type
|
|
2
2
|
|
|
3
3
|
import orjson
|
|
4
|
-
from anystore.io import
|
|
4
|
+
from anystore.io import smart_open, smart_stream
|
|
5
|
+
from anystore.logging import get_logger
|
|
6
|
+
from anystore.types import Uri
|
|
5
7
|
from banal import is_listish
|
|
6
8
|
from followthemoney import E, StatementEntity, ValueEntity
|
|
7
9
|
|
|
8
|
-
from ftmq.logging import get_logger
|
|
9
10
|
from ftmq.query import Query
|
|
10
11
|
from ftmq.store import Store, get_store
|
|
11
12
|
from ftmq.types import Entities, Entity
|
|
@@ -13,8 +14,6 @@ from ftmq.util import ensure_entity, make_entity
|
|
|
13
14
|
|
|
14
15
|
log = get_logger(__name__)
|
|
15
16
|
|
|
16
|
-
DEFAULT_MODE = "rb"
|
|
17
|
-
|
|
18
17
|
|
|
19
18
|
def smart_get_store(uri: Uri, **kwargs) -> Store | None:
|
|
20
19
|
try:
|
|
@@ -1,19 +1,22 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
|
-
from typing import Literal
|
|
2
|
+
from typing import Literal, TypeVar
|
|
3
3
|
|
|
4
4
|
from anystore.io import logged_items
|
|
5
|
-
from anystore.types import SDict
|
|
5
|
+
from anystore.types import HttpUrlStr, SDict
|
|
6
6
|
from followthemoney.dataset import DataPublisher
|
|
7
7
|
from followthemoney.dataset.dataset import DatasetModel as _DatasetModel
|
|
8
|
-
from pydantic import AnyUrl
|
|
8
|
+
from pydantic import AnyUrl
|
|
9
9
|
from rigour.mime.types import FTM
|
|
10
10
|
|
|
11
11
|
from ftmq.model.mixins import BaseModel
|
|
12
12
|
from ftmq.model.stats import DatasetStats
|
|
13
13
|
from ftmq.types import Entities
|
|
14
|
+
from ftmq.util import DEFAULT_DATASET
|
|
14
15
|
|
|
15
16
|
ContentType = Literal["documents", "structured", "mixed"]
|
|
16
17
|
|
|
18
|
+
D = TypeVar("D", bound="Dataset")
|
|
19
|
+
|
|
17
20
|
|
|
18
21
|
class Dataset(BaseModel, _DatasetModel):
|
|
19
22
|
prefix: str | None = None
|
|
@@ -57,9 +60,9 @@ class Catalog(BaseModel):
|
|
|
57
60
|
description: str | None = None
|
|
58
61
|
maintainer: DataPublisher | None = None
|
|
59
62
|
publisher: DataPublisher | None = None
|
|
60
|
-
url:
|
|
63
|
+
url: HttpUrlStr | None = None
|
|
61
64
|
uri: str | None = None
|
|
62
|
-
logo_url:
|
|
65
|
+
logo_url: HttpUrlStr | None = None
|
|
63
66
|
git_repo: AnyUrl | None = None
|
|
64
67
|
|
|
65
68
|
def iterate(self) -> Entities:
|
|
@@ -70,3 +73,8 @@ class Catalog(BaseModel):
|
|
|
70
73
|
def names(self) -> set[str]:
|
|
71
74
|
"""Get the names of all datasets in the catalog."""
|
|
72
75
|
return {d.name for d in self.datasets}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def make_dataset(name: str = DEFAULT_DATASET, cls: type[D] = Dataset, **kwargs) -> D:
|
|
79
|
+
kwargs["title"] = kwargs.pop("title", name)
|
|
80
|
+
return cls(name=name, **kwargs)
|
|
@@ -2,8 +2,9 @@ from collections import Counter
|
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
|
+
from anystore.model import BaseModel
|
|
5
6
|
from followthemoney import model
|
|
6
|
-
from pydantic import
|
|
7
|
+
from pydantic import model_validator
|
|
7
8
|
|
|
8
9
|
from ftmq.types import Entities, Entity
|
|
9
10
|
from ftmq.util import get_country_name, get_year_from_iso
|
ftmq-4.5.3/ftmq/py.typed
ADDED
|
File without changes
|
|
@@ -1,25 +1,26 @@
|
|
|
1
|
-
from typing import Generator, Iterable
|
|
1
|
+
from typing import Generator, Generic, Iterable, TypeVar
|
|
2
2
|
from urllib.parse import urlparse
|
|
3
3
|
|
|
4
4
|
from anystore.functools import weakref_cache as cache
|
|
5
|
-
from
|
|
5
|
+
from anystore.logging import get_logger
|
|
6
6
|
from followthemoney.dataset.dataset import Dataset
|
|
7
7
|
from nomenklatura import store as nk
|
|
8
8
|
from nomenklatura.db import get_engine
|
|
9
9
|
from nomenklatura.resolver import Resolver
|
|
10
10
|
|
|
11
11
|
from ftmq.aggregations import AggregatorResult
|
|
12
|
-
from ftmq.logging import get_logger
|
|
13
12
|
from ftmq.model.stats import Collector, DatasetStats
|
|
14
13
|
from ftmq.query import Query
|
|
15
14
|
from ftmq.similar import get_similar
|
|
16
15
|
from ftmq.types import StatementEntities, StatementEntity
|
|
17
|
-
from ftmq.util import ensure_dataset
|
|
16
|
+
from ftmq.util import DEFAULT_DATASET, ensure_dataset
|
|
18
17
|
|
|
19
18
|
log = get_logger(__name__)
|
|
20
19
|
|
|
21
20
|
DEFAULT_ORIGIN = "default"
|
|
22
21
|
|
|
22
|
+
V = TypeVar("V", bound="View")
|
|
23
|
+
|
|
23
24
|
|
|
24
25
|
@cache
|
|
25
26
|
def get_resolver(uri: str | None = None) -> Resolver[StatementEntity]:
|
|
@@ -28,7 +29,7 @@ def get_resolver(uri: str | None = None) -> Resolver[StatementEntity]:
|
|
|
28
29
|
return Resolver.make_default(get_engine("sqlite:///:memory:"))
|
|
29
30
|
|
|
30
31
|
|
|
31
|
-
class Store(nk.Store):
|
|
32
|
+
class Store(nk.Store[Dataset, StatementEntity], Generic[V]):
|
|
32
33
|
"""
|
|
33
34
|
Feature add-ons to `nomenklatura.store.Store`
|
|
34
35
|
"""
|
|
@@ -51,7 +52,7 @@ class Store(nk.Store):
|
|
|
51
52
|
linker = linker or get_resolver(kwargs.get("uri"))
|
|
52
53
|
super().__init__(dataset=dataset, linker=linker, **kwargs)
|
|
53
54
|
# implicit set all datasets as default store scope:
|
|
54
|
-
if dataset ==
|
|
55
|
+
if dataset.name == DEFAULT_DATASET and not dataset.leaf_names:
|
|
55
56
|
self.dataset = self.get_scope()
|
|
56
57
|
|
|
57
58
|
def get_scope(self) -> Dataset:
|
|
@@ -60,6 +61,12 @@ class Store(nk.Store):
|
|
|
60
61
|
"""
|
|
61
62
|
raise NotImplementedError
|
|
62
63
|
|
|
64
|
+
def view(self, scope: Dataset | None = None, external: bool = False) -> V:
|
|
65
|
+
raise NotImplementedError
|
|
66
|
+
|
|
67
|
+
def default_view(self, external: bool = False) -> V:
|
|
68
|
+
return self.view(self.dataset, external)
|
|
69
|
+
|
|
63
70
|
def iterate(self, dataset: str | Dataset | None = None) -> StatementEntities:
|
|
64
71
|
"""
|
|
65
72
|
Iterate all the entities, optional filter for a dataset.
|
|
@@ -19,16 +19,15 @@ Layout:
|
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
from pathlib import Path
|
|
22
|
-
from typing import Any, Generator
|
|
22
|
+
from typing import Any, Generator
|
|
23
23
|
from urllib.parse import urlparse
|
|
24
24
|
|
|
25
25
|
import duckdb
|
|
26
|
-
import
|
|
27
|
-
import pandas as pd
|
|
26
|
+
import pyarrow as pa
|
|
28
27
|
from anystore.functools import weakref_cache as cache
|
|
29
|
-
from anystore.lock import Lock
|
|
28
|
+
from anystore.interface.lock import Lock
|
|
30
29
|
from anystore.logging import get_logger
|
|
31
|
-
from anystore.store
|
|
30
|
+
from anystore.store import Store as FSStore
|
|
32
31
|
from anystore.types import SDict
|
|
33
32
|
from anystore.util import clean_dict
|
|
34
33
|
from deltalake import (
|
|
@@ -39,6 +38,7 @@ from deltalake import (
|
|
|
39
38
|
write_deltalake,
|
|
40
39
|
)
|
|
41
40
|
from deltalake._internal import TableNotFoundError
|
|
41
|
+
from deltalake.table import FilterConjunctionType
|
|
42
42
|
from followthemoney import EntityProxy, StatementEntity, model
|
|
43
43
|
from followthemoney.dataset.dataset import Dataset
|
|
44
44
|
from followthemoney.statement import Statement
|
|
@@ -51,17 +51,19 @@ from sqlalchemy import Boolean, DateTime, column, select, table
|
|
|
51
51
|
from sqlalchemy.sql import Select
|
|
52
52
|
|
|
53
53
|
from ftmq.query import Query
|
|
54
|
-
from ftmq.store.base import Store
|
|
54
|
+
from ftmq.store.base import DEFAULT_ORIGIN, Store
|
|
55
55
|
from ftmq.store.sql import SQLQueryView, SQLStore
|
|
56
56
|
from ftmq.types import StatementEntities
|
|
57
57
|
from ftmq.util import apply_dataset, ensure_entity, get_scope_dataset
|
|
58
58
|
|
|
59
59
|
log = get_logger(__name__)
|
|
60
60
|
|
|
61
|
-
Z_ORDER = ["canonical_id", "
|
|
61
|
+
Z_ORDER = ["canonical_id", "prop"] # don't add more columns here
|
|
62
62
|
TARGET_SIZE = 50 * 10_485_760 # 500 MB
|
|
63
63
|
PARTITION_BY = ["dataset", "bucket", "origin"]
|
|
64
|
-
|
|
64
|
+
BUCKET_MENTION = "mention"
|
|
65
|
+
BUCKET_PAGE = "page"
|
|
66
|
+
BUCKET_PAGES = "pages"
|
|
65
67
|
BUCKET_DOCUMENT = "document"
|
|
66
68
|
BUCKET_INTERVAL = "interval"
|
|
67
69
|
BUCKET_THING = "thing"
|
|
@@ -82,9 +84,15 @@ WRITER = WriterProperties(
|
|
|
82
84
|
"schema": STATISTICS,
|
|
83
85
|
"prop": STATISTICS_BLOOM,
|
|
84
86
|
"value": STATISTICS_BLOOM,
|
|
87
|
+
"last_seen": ColumnProperties(statistics_enabled="CHUNK"),
|
|
85
88
|
},
|
|
86
89
|
)
|
|
87
90
|
|
|
91
|
+
SA_TO_ARROW: dict[type, pa.DataType] = {
|
|
92
|
+
Boolean: pa.bool_(),
|
|
93
|
+
DateTime: pa.timestamp("us"),
|
|
94
|
+
}
|
|
95
|
+
|
|
88
96
|
TABLE = table(
|
|
89
97
|
nks.STATEMENT_TABLE,
|
|
90
98
|
column("id"),
|
|
@@ -93,6 +101,7 @@ TABLE = table(
|
|
|
93
101
|
column("dataset"),
|
|
94
102
|
column("bucket"),
|
|
95
103
|
column("origin"),
|
|
104
|
+
column("source"),
|
|
96
105
|
column("schema"),
|
|
97
106
|
column("prop"),
|
|
98
107
|
column("prop_type"),
|
|
@@ -104,6 +113,10 @@ TABLE = table(
|
|
|
104
113
|
column("last_seen", DateTime),
|
|
105
114
|
)
|
|
106
115
|
|
|
116
|
+
ARROW_SCHEMA = pa.schema(
|
|
117
|
+
[(col.name, SA_TO_ARROW.get(type(col.type), pa.string())) for col in TABLE.columns]
|
|
118
|
+
)
|
|
119
|
+
|
|
107
120
|
|
|
108
121
|
class StorageSettings(BaseSettings):
|
|
109
122
|
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
|
|
@@ -148,22 +161,26 @@ def storage_options() -> SDict:
|
|
|
148
161
|
@cache
|
|
149
162
|
def setup_duckdb_storage() -> None:
|
|
150
163
|
if storage_settings.secret:
|
|
151
|
-
duckdb.query(
|
|
152
|
-
f"""CREATE OR REPLACE SECRET secret (
|
|
164
|
+
duckdb.query(f"""CREATE OR REPLACE SECRET secret (
|
|
153
165
|
TYPE s3,
|
|
154
166
|
PROVIDER config,
|
|
155
167
|
KEY_ID '{storage_settings.key}',
|
|
156
168
|
SECRET '{storage_settings.secret}',
|
|
157
|
-
ENDPOINT '{storage_settings.
|
|
169
|
+
ENDPOINT '{storage_settings.duckdb_endpoint}',
|
|
158
170
|
URL_STYLE 'path',
|
|
159
171
|
USE_SSL '{not storage_settings.allow_http}'
|
|
160
|
-
);"""
|
|
161
|
-
)
|
|
172
|
+
);""")
|
|
162
173
|
|
|
163
174
|
|
|
164
175
|
@cache
|
|
165
176
|
def get_schema_bucket(schema_name: str) -> str:
|
|
166
177
|
s = model[schema_name]
|
|
178
|
+
if s.is_a("Mention"):
|
|
179
|
+
return BUCKET_MENTION
|
|
180
|
+
if s.is_a("Page"):
|
|
181
|
+
return BUCKET_PAGE
|
|
182
|
+
if s.is_a("Pages"):
|
|
183
|
+
return BUCKET_PAGES
|
|
167
184
|
if s.is_a("Document"):
|
|
168
185
|
return BUCKET_DOCUMENT
|
|
169
186
|
if s.is_a("Interval"):
|
|
@@ -171,19 +188,13 @@ def get_schema_bucket(schema_name: str) -> str:
|
|
|
171
188
|
return BUCKET_THING
|
|
172
189
|
|
|
173
190
|
|
|
174
|
-
def pack_statement(stmt: Statement) -> SDict:
|
|
191
|
+
def pack_statement(stmt: Statement, source: str | None = None) -> SDict:
|
|
175
192
|
data = stmt.to_db_row()
|
|
176
193
|
data["bucket"] = get_schema_bucket(data["schema"])
|
|
194
|
+
data["source"] = source
|
|
177
195
|
return data
|
|
178
196
|
|
|
179
197
|
|
|
180
|
-
def pack_statements(statements: Iterable[Statement]) -> pd.DataFrame:
|
|
181
|
-
df = pd.DataFrame(map(pack_statement, statements))
|
|
182
|
-
df = df.drop_duplicates() # .sort_values(Z_ORDER)
|
|
183
|
-
df = df.fillna(np.nan)
|
|
184
|
-
return df
|
|
185
|
-
|
|
186
|
-
|
|
187
198
|
def compile_query(q: Select) -> str:
|
|
188
199
|
table = nks.STATEMENT_TABLE
|
|
189
200
|
sql = str(q.compile(compile_kwargs={"literal_binds": True}))
|
|
@@ -237,15 +248,12 @@ class LakeQueryView(SQLQueryView):
|
|
|
237
248
|
yield from super().query(query)
|
|
238
249
|
|
|
239
250
|
|
|
240
|
-
class LakeStore(SQLStore):
|
|
251
|
+
class LakeStore(SQLStore[LakeQueryView]):
|
|
241
252
|
def __init__(self, *args, **kwargs) -> None:
|
|
242
|
-
self._backend
|
|
253
|
+
self._backend = FSStore(uri=kwargs.pop("uri"))
|
|
243
254
|
self._partition_by = kwargs.pop("partition_by", PARTITION_BY)
|
|
244
255
|
self._lock: Lock = kwargs.pop("lock", Lock(self._backend))
|
|
245
256
|
self._enforce_dataset = kwargs.pop("enforce_dataset", False)
|
|
246
|
-
assert isinstance(
|
|
247
|
-
self._backend, FSStore
|
|
248
|
-
), f"Invalid store backend: `{self._backend.__class__}"
|
|
249
257
|
kwargs["uri"] = "sqlite:///:memory:" # fake it till you make it
|
|
250
258
|
get_metadata.cache_clear()
|
|
251
259
|
super().__init__(*args, **kwargs)
|
|
@@ -275,12 +283,14 @@ class LakeStore(SQLStore):
|
|
|
275
283
|
|
|
276
284
|
def view(
|
|
277
285
|
self, scope: Dataset | None = None, external: bool = False
|
|
278
|
-
) ->
|
|
286
|
+
) -> LakeQueryView:
|
|
279
287
|
scope = scope or self.dataset
|
|
280
288
|
return LakeQueryView(self, scope, external)
|
|
281
289
|
|
|
282
|
-
def writer(
|
|
283
|
-
|
|
290
|
+
def writer(
|
|
291
|
+
self, origin: str | None = DEFAULT_ORIGIN, source: str | None = None
|
|
292
|
+
) -> "LakeWriter":
|
|
293
|
+
return LakeWriter(self, origin=origin or DEFAULT_ORIGIN, source=source)
|
|
284
294
|
|
|
285
295
|
def get_origins(self) -> set[str]:
|
|
286
296
|
q = select(self.table.c.origin).distinct()
|
|
@@ -291,32 +301,55 @@ class LakeWriter(nk.Writer):
|
|
|
291
301
|
store: LakeStore
|
|
292
302
|
BATCH_STATEMENTS = 1_000_000
|
|
293
303
|
|
|
294
|
-
def __init__(
|
|
304
|
+
def __init__(
|
|
305
|
+
self,
|
|
306
|
+
store: Store,
|
|
307
|
+
origin: str | None = DEFAULT_ORIGIN,
|
|
308
|
+
source: str | None = None,
|
|
309
|
+
):
|
|
295
310
|
super().__init__(store)
|
|
296
|
-
self.batch:
|
|
311
|
+
self.batch: dict[str, tuple[Statement, str | None]] = {}
|
|
297
312
|
self.origin = origin or DEFAULT_ORIGIN
|
|
313
|
+
self.source = source
|
|
298
314
|
|
|
299
|
-
def add_statement(self, stmt: Statement) -> None:
|
|
315
|
+
def add_statement(self, stmt: Statement, source: str | None = None) -> None:
|
|
300
316
|
if stmt.entity_id is None:
|
|
301
317
|
return
|
|
302
318
|
stmt.origin = stmt.origin or self.origin
|
|
303
319
|
canonical_id = self.store.linker.get_canonical(stmt.entity_id)
|
|
304
320
|
stmt.canonical_id = canonical_id
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
321
|
+
key = f"{canonical_id}\t{stmt.id}"
|
|
322
|
+
self.batch[key] = (stmt, source or self.source)
|
|
323
|
+
|
|
324
|
+
def add_entity(
|
|
325
|
+
self,
|
|
326
|
+
entity: EntityProxy,
|
|
327
|
+
origin: str | None = None,
|
|
328
|
+
source: str | None = None,
|
|
329
|
+
) -> None:
|
|
308
330
|
e = ensure_entity(entity, StatementEntity, self.store.dataset)
|
|
309
331
|
if self.store._enforce_dataset:
|
|
310
332
|
e = apply_dataset(e, self.store.dataset, replace=True)
|
|
311
333
|
for stmt in e.statements:
|
|
312
334
|
if origin:
|
|
313
335
|
stmt.origin = origin
|
|
314
|
-
self.add_statement(stmt)
|
|
336
|
+
self.add_statement(stmt, source=source)
|
|
315
337
|
# we check here instead of in `add_statement` as this will keep entities
|
|
316
|
-
# together in the same parquet files
|
|
338
|
+
# together in the same parquet files
|
|
317
339
|
if len(self.batch) >= self.BATCH_STATEMENTS:
|
|
318
340
|
self.flush()
|
|
319
341
|
|
|
342
|
+
def _pack_batches(self) -> Generator[pa.RecordBatch, None, None]:
|
|
343
|
+
batch: list[SDict] = []
|
|
344
|
+
for key in sorted(self.batch):
|
|
345
|
+
stmt, source = self.batch[key]
|
|
346
|
+
batch.append(pack_statement(stmt, source))
|
|
347
|
+
if len(batch) >= 100_000:
|
|
348
|
+
yield pa.RecordBatch.from_pylist(batch, schema=ARROW_SCHEMA)
|
|
349
|
+
batch = []
|
|
350
|
+
if batch:
|
|
351
|
+
yield pa.RecordBatch.from_pylist(batch, schema=ARROW_SCHEMA)
|
|
352
|
+
|
|
320
353
|
def flush(self) -> None:
|
|
321
354
|
if self.batch:
|
|
322
355
|
log.info(
|
|
@@ -324,18 +357,22 @@ class LakeWriter(nk.Writer):
|
|
|
324
357
|
uri=self.store.uri,
|
|
325
358
|
)
|
|
326
359
|
with self.store._lock:
|
|
360
|
+
reader = pa.RecordBatchReader.from_batches(
|
|
361
|
+
ARROW_SCHEMA, self._pack_batches()
|
|
362
|
+
)
|
|
327
363
|
write_deltalake(
|
|
328
364
|
str(self.store.uri),
|
|
329
|
-
|
|
365
|
+
reader,
|
|
330
366
|
partition_by=self.store._partition_by,
|
|
331
367
|
mode="append",
|
|
332
368
|
schema_mode="merge",
|
|
333
369
|
writer_properties=WRITER,
|
|
334
370
|
target_file_size=TARGET_SIZE,
|
|
335
371
|
storage_options=storage_options(),
|
|
372
|
+
configuration={"delta.enableChangeDataFeed": "true"},
|
|
336
373
|
)
|
|
337
374
|
|
|
338
|
-
self.batch =
|
|
375
|
+
self.batch = {}
|
|
339
376
|
|
|
340
377
|
def pop(self, entity_id: str) -> list[Statement]:
|
|
341
378
|
q = select(TABLE)
|
|
@@ -348,18 +385,42 @@ class LakeWriter(nk.Writer):
|
|
|
348
385
|
return statements
|
|
349
386
|
|
|
350
387
|
def optimize(
|
|
351
|
-
self,
|
|
388
|
+
self,
|
|
389
|
+
vacuum: bool | None = False,
|
|
390
|
+
vacuum_keep_hours: int | None = 0,
|
|
391
|
+
dataset: str | None = None,
|
|
392
|
+
bucket: str | None = None,
|
|
393
|
+
origin: str | None = None,
|
|
352
394
|
) -> None:
|
|
353
395
|
"""
|
|
354
396
|
Optimize the storage: Z-Ordering and compacting
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
vacuum: Run vacuum after optimization
|
|
400
|
+
vacuum_keep_hours: Retention hours for vacuum
|
|
401
|
+
dataset: Filter optimization to specific dataset partition
|
|
402
|
+
bucket: Filter optimization to specific bucket partition
|
|
403
|
+
origin: Filter optimization to specific origin partition
|
|
355
404
|
"""
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
if
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
405
|
+
filters: FilterConjunctionType = []
|
|
406
|
+
if dataset is not None:
|
|
407
|
+
filters.append(("dataset", "=", dataset))
|
|
408
|
+
if bucket is not None:
|
|
409
|
+
filters.append(("bucket", "=", bucket))
|
|
410
|
+
if origin is not None:
|
|
411
|
+
filters.append(("origin", "=", origin))
|
|
412
|
+
|
|
413
|
+
with self.store._lock:
|
|
414
|
+
self.store.deltatable.optimize.z_order(
|
|
415
|
+
Z_ORDER,
|
|
416
|
+
writer_properties=WRITER,
|
|
417
|
+
target_size=TARGET_SIZE,
|
|
418
|
+
partition_filters=filters or None,
|
|
365
419
|
)
|
|
420
|
+
if vacuum:
|
|
421
|
+
self.store.deltatable.vacuum(
|
|
422
|
+
retention_hours=vacuum_keep_hours,
|
|
423
|
+
enforce_retention_duration=False,
|
|
424
|
+
dry_run=False,
|
|
425
|
+
full=True,
|
|
426
|
+
)
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from decimal import Decimal
|
|
4
|
+
from typing import Generic
|
|
4
5
|
|
|
5
6
|
from anystore.util import clean_dict
|
|
6
7
|
from followthemoney.dataset.dataset import Dataset
|
|
7
8
|
from nomenklatura.db import get_metadata
|
|
8
9
|
from nomenklatura.store import sql as nk
|
|
9
10
|
from sqlalchemy import select
|
|
11
|
+
from typing_extensions import TypeVar
|
|
10
12
|
|
|
11
13
|
from ftmq.aggregations import AggregatorResult
|
|
12
14
|
from ftmq.enums import Fields
|
|
@@ -16,6 +18,8 @@ from ftmq.store.base import Store, View
|
|
|
16
18
|
from ftmq.types import StatementEntities
|
|
17
19
|
from ftmq.util import get_scope_dataset
|
|
18
20
|
|
|
21
|
+
V = TypeVar("V", bound=View, default="SQLQueryView")
|
|
22
|
+
|
|
19
23
|
MAX_SQL_AGG_GROUPS = int(os.environ.get("MAX_SQL_AGG_GROUPS", 10))
|
|
20
24
|
|
|
21
25
|
|
|
@@ -125,7 +129,7 @@ class SQLQueryView(View, nk.SQLView):
|
|
|
125
129
|
return res
|
|
126
130
|
|
|
127
131
|
|
|
128
|
-
class SQLStore(Store, nk.SQLStore):
|
|
132
|
+
class SQLStore(Store[V], nk.SQLStore, Generic[V]):
|
|
129
133
|
def __init__(self, *args, **kwargs) -> None:
|
|
130
134
|
get_metadata.cache_clear() # FIXME
|
|
131
135
|
super().__init__(*args, **kwargs)
|
|
@@ -137,8 +141,6 @@ class SQLStore(Store, nk.SQLStore):
|
|
|
137
141
|
names.add(row[0])
|
|
138
142
|
return get_scope_dataset(*names)
|
|
139
143
|
|
|
140
|
-
def view(
|
|
141
|
-
self, scope: Dataset | None = None, external: bool = False
|
|
142
|
-
) -> SQLQueryView:
|
|
144
|
+
def view(self, scope: Dataset | None = None, external: bool = False) -> V:
|
|
143
145
|
scope = scope or self.dataset
|
|
144
|
-
return SQLQueryView(self, scope, external=external)
|
|
146
|
+
return SQLQueryView(self, scope, external=external) # type: ignore[return-value]
|
|
@@ -1,7 +1,5 @@
|
|
|
1
|
-
from functools import lru_cache
|
|
2
1
|
from typing import Any, Generator, Type
|
|
3
2
|
|
|
4
|
-
import pycountry
|
|
5
3
|
from anystore.functools import weakref_cache as cache
|
|
6
4
|
from anystore.types import SDict, StrGenerator
|
|
7
5
|
from banal import ensure_list, is_listish
|
|
@@ -16,6 +14,7 @@ from followthemoney.util import make_entity_id, sanitize_text
|
|
|
16
14
|
from normality import latinize_text, slugify, squash_spaces
|
|
17
15
|
from rigour.names import Name, Symbol, tag_org_name, tag_person_name
|
|
18
16
|
from rigour.names.tokenize import normalize_name
|
|
17
|
+
from rigour.territories import lookup_territory
|
|
19
18
|
from rigour.text.scripts import can_latinize
|
|
20
19
|
|
|
21
20
|
from ftmq.enums import Comparators
|
|
@@ -149,7 +148,7 @@ def apply_dataset(entity: E, dataset: str | Dataset, replace: bool | None = Fals
|
|
|
149
148
|
def get_country_name(code: str) -> str:
|
|
150
149
|
"""
|
|
151
150
|
Get the (english) country name for the given 2-letter iso code via
|
|
152
|
-
[
|
|
151
|
+
[rigour.territories](https://rigour.followthemoney.tech/territories/)
|
|
153
152
|
|
|
154
153
|
Examples:
|
|
155
154
|
>>> get_country_name("de")
|
|
@@ -165,22 +164,17 @@ def get_country_name(code: str) -> str:
|
|
|
165
164
|
Returns:
|
|
166
165
|
Either the country name for a valid code or the code as fallback.
|
|
167
166
|
"""
|
|
168
|
-
|
|
169
|
-
if
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
country = pycountry.countries.get(alpha_2=code_clean)
|
|
173
|
-
if country is not None:
|
|
174
|
-
return country.name
|
|
175
|
-
except (LookupError, AttributeError):
|
|
176
|
-
return code
|
|
177
|
-
return code_clean
|
|
167
|
+
territory = lookup_territory(code)
|
|
168
|
+
if territory is not None:
|
|
169
|
+
return territory.name
|
|
170
|
+
return code
|
|
178
171
|
|
|
179
172
|
|
|
180
|
-
@
|
|
173
|
+
@cache
|
|
181
174
|
def get_country_code(value: Any, splitter: str | None = ",") -> str | None:
|
|
182
175
|
"""
|
|
183
|
-
Get the 2-letter iso country code for an arbitrary country name
|
|
176
|
+
Get the 2-letter iso country code for an arbitrary country name via
|
|
177
|
+
[rigour.territories](https://rigour.followthemoney.tech/territories/)
|
|
184
178
|
|
|
185
179
|
Examples:
|
|
186
180
|
>>> get_country_code("Germany")
|
|
@@ -201,15 +195,16 @@ def get_country_code(value: Any, splitter: str | None = ",") -> str | None:
|
|
|
201
195
|
"""
|
|
202
196
|
value = clean_string(value)
|
|
203
197
|
if not value:
|
|
204
|
-
return
|
|
205
|
-
|
|
206
|
-
if
|
|
207
|
-
return
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
198
|
+
return None
|
|
199
|
+
territory = lookup_territory(value)
|
|
200
|
+
if territory is not None:
|
|
201
|
+
return territory.ftm_country
|
|
202
|
+
if splitter:
|
|
203
|
+
for token in value.split(splitter):
|
|
204
|
+
territory = lookup_territory(token.strip())
|
|
205
|
+
if territory is not None:
|
|
206
|
+
return territory.ftm_country
|
|
207
|
+
return None
|
|
213
208
|
|
|
214
209
|
|
|
215
210
|
def join_slug(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "ftmq"
|
|
3
|
-
version = "4.3
|
|
3
|
+
version = "4.5.3"
|
|
4
4
|
description = "followthemoney query dsl and io helpers"
|
|
5
5
|
authors = [{ name = "Simon Wörpel", email = "simon.woerpel@pm.me" }]
|
|
6
6
|
license = "AGPLv3+"
|
|
@@ -12,19 +12,19 @@ classifiers = [
|
|
|
12
12
|
"Programming Language :: Python :: 3.11",
|
|
13
13
|
"Programming Language :: Python :: 3.12",
|
|
14
14
|
"Programming Language :: Python :: 3.13",
|
|
15
|
+
# "Programming Language :: Python :: 3.14",
|
|
15
16
|
]
|
|
16
|
-
requires-python = ">=3.11,<
|
|
17
|
+
requires-python = ">=3.11,<3.14"
|
|
17
18
|
dependencies = [
|
|
18
|
-
"anystore (>=0.
|
|
19
|
-
"followthemoney (>=4.
|
|
20
|
-
"nomenklatura (>=4.1
|
|
21
|
-
"rigour (>=1.
|
|
19
|
+
"anystore (>=1.0.1,<2.0.0)",
|
|
20
|
+
"followthemoney (>=4.5.2,<5.0.0)",
|
|
21
|
+
"nomenklatura (>=4.6.1,<5.0.0)",
|
|
22
|
+
"rigour (>=1.6.2,<2.0.0)",
|
|
22
23
|
"click (>=8.2.1,<9.0.0)",
|
|
23
24
|
"click-default-group (>=1.2.4,<2.0.0)",
|
|
24
25
|
"orjson (>=3.10.18,<4.0.0)",
|
|
25
26
|
"pyicu (>=2.15.2,<3.0.0)",
|
|
26
27
|
"pydantic (>=2.11.3,<3.0.0)",
|
|
27
|
-
"pycountry (>=24.6.1,<25.0.0)",
|
|
28
28
|
]
|
|
29
29
|
|
|
30
30
|
[project.optional-dependencies]
|
|
@@ -33,10 +33,10 @@ sql = ["sqlalchemy (>=2.0.36,<3.0.0)"]
|
|
|
33
33
|
postgres = ["sqlalchemy (>=2.0.36,<3.0.0)", "psycopg[pool] (>=3.2.9,<4.0.0)"]
|
|
34
34
|
redis = ["redis (>=5.2.1,<6.0.0)", "fakeredis (>=2.26.2,<3.0.0)"]
|
|
35
35
|
lake = [
|
|
36
|
-
"duckdb (>=1.4.
|
|
37
|
-
"
|
|
38
|
-
"
|
|
39
|
-
"
|
|
36
|
+
"duckdb (>=1.4.4,<2.0.0)",
|
|
37
|
+
"deltalake (>=1.4.1,<2.0.0)",
|
|
38
|
+
"pyarrow (>=23.0.0,<24.0.0)",
|
|
39
|
+
"pandas (>=3.0.0,<4.0.0)",
|
|
40
40
|
]
|
|
41
41
|
aleph = ["furl (>=2.1.4,<3.0.0)", "alephclient (>=2.6.0,<3.0.0)"]
|
|
42
42
|
|
|
@@ -50,10 +50,10 @@ Repository = "https://github.com/dataresearchcenter/ftmq"
|
|
|
50
50
|
Issues = "https://github.com/dataresearchcenter/ftmq/issues"
|
|
51
51
|
|
|
52
52
|
[tool.poetry.group.dev.dependencies]
|
|
53
|
-
pytest = ">=7.4.3,<
|
|
53
|
+
pytest = ">=7.4.3,<10.0.0"
|
|
54
54
|
pytest-cov = ">=4.1,<8.0"
|
|
55
55
|
pytest-env = "^1.1.1"
|
|
56
|
-
black = ">=23.11,<
|
|
56
|
+
black = ">=23.11,<27.0"
|
|
57
57
|
isort = "^7.0.0"
|
|
58
58
|
mypy = "^1.17.1"
|
|
59
59
|
pre-commit = "^4.0.1"
|
|
@@ -62,7 +62,7 @@ ipdb = "^0.13.13"
|
|
|
62
62
|
bump2version = "^1.0.1"
|
|
63
63
|
mkdocs = "^1.6.1"
|
|
64
64
|
mkdocs-autorefs = "^1.4.3"
|
|
65
|
-
mkdocstrings-python = "^
|
|
65
|
+
mkdocstrings-python = "^2.0.0"
|
|
66
66
|
mkdocs-material = "^9.6.18"
|
|
67
67
|
mkdocs-click = "^0.9.0"
|
|
68
68
|
|
ftmq-4.3.2/ftmq/logging.py
DELETED
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import os
|
|
3
|
-
import sys
|
|
4
|
-
from logging import Filter, LogRecord
|
|
5
|
-
from typing import Any, Dict, List
|
|
6
|
-
|
|
7
|
-
import structlog
|
|
8
|
-
from banal import as_bool
|
|
9
|
-
from structlog.contextvars import merge_contextvars
|
|
10
|
-
from structlog.dev import ConsoleRenderer, set_exc_info
|
|
11
|
-
from structlog.processors import (
|
|
12
|
-
JSONRenderer,
|
|
13
|
-
TimeStamper,
|
|
14
|
-
UnicodeDecoder,
|
|
15
|
-
add_log_level,
|
|
16
|
-
format_exc_info,
|
|
17
|
-
)
|
|
18
|
-
from structlog.stdlib import (
|
|
19
|
-
BoundLogger,
|
|
20
|
-
LoggerFactory,
|
|
21
|
-
ProcessorFormatter,
|
|
22
|
-
add_logger_name,
|
|
23
|
-
)
|
|
24
|
-
from structlog.stdlib import get_logger as get_raw_logger
|
|
25
|
-
|
|
26
|
-
LOG_JSON = as_bool(os.environ.get("LOG_JSON"))
|
|
27
|
-
LOG_LEVEL = os.environ.get("LOG_LEVEL", "info").upper()
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def get_logger(name: str, *args, **kwargs) -> BoundLogger:
|
|
31
|
-
return get_raw_logger(name, *args, **kwargs)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def configure_logging(level: int = logging.INFO) -> None:
|
|
35
|
-
"""Configure log levels and structured logging"""
|
|
36
|
-
shared_processors: List[Any] = [
|
|
37
|
-
add_log_level,
|
|
38
|
-
add_logger_name,
|
|
39
|
-
# structlog.stdlib.PositionalArgumentsFormatter(),
|
|
40
|
-
# structlog.processors.StackInfoRenderer(),
|
|
41
|
-
merge_contextvars,
|
|
42
|
-
set_exc_info,
|
|
43
|
-
TimeStamper(fmt="iso"),
|
|
44
|
-
# format_exc_info,
|
|
45
|
-
UnicodeDecoder(),
|
|
46
|
-
]
|
|
47
|
-
|
|
48
|
-
if LOG_JSON:
|
|
49
|
-
shared_processors.append(format_exc_info)
|
|
50
|
-
shared_processors.append(format_json)
|
|
51
|
-
formatter = ProcessorFormatter(
|
|
52
|
-
foreign_pre_chain=shared_processors,
|
|
53
|
-
processor=JSONRenderer(),
|
|
54
|
-
)
|
|
55
|
-
else:
|
|
56
|
-
formatter = ProcessorFormatter(
|
|
57
|
-
foreign_pre_chain=shared_processors,
|
|
58
|
-
processor=ConsoleRenderer(
|
|
59
|
-
exception_formatter=structlog.dev.plain_traceback
|
|
60
|
-
),
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
processors = shared_processors + [
|
|
64
|
-
ProcessorFormatter.wrap_for_formatter,
|
|
65
|
-
]
|
|
66
|
-
|
|
67
|
-
# configuration for structlog based loggers
|
|
68
|
-
structlog.configure(
|
|
69
|
-
cache_logger_on_first_use=True,
|
|
70
|
-
# wrapper_class=AsyncBoundLogger,
|
|
71
|
-
wrapper_class=BoundLogger,
|
|
72
|
-
processors=processors,
|
|
73
|
-
context_class=dict,
|
|
74
|
-
logger_factory=LoggerFactory(),
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
# handler for low level logs that should be sent to STDERR
|
|
78
|
-
out_handler = logging.StreamHandler(sys.stderr)
|
|
79
|
-
out_handler.setLevel(level)
|
|
80
|
-
out_handler.addFilter(_MaxLevelFilter(logging.WARNING))
|
|
81
|
-
out_handler.setFormatter(formatter)
|
|
82
|
-
# handler for high level logs that should be sent to STDERR
|
|
83
|
-
error_handler = logging.StreamHandler(sys.stderr)
|
|
84
|
-
error_handler.setLevel(logging.ERROR)
|
|
85
|
-
error_handler.setFormatter(formatter)
|
|
86
|
-
|
|
87
|
-
root_logger = logging.getLogger()
|
|
88
|
-
root_logger.setLevel(LOG_LEVEL)
|
|
89
|
-
root_logger.addHandler(out_handler)
|
|
90
|
-
root_logger.addHandler(error_handler)
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def format_json(_: Any, __: Any, ed: Dict[str, str]) -> Dict[str, str]:
|
|
94
|
-
"""Stackdriver uses `message` and `severity` keys to display logs"""
|
|
95
|
-
ed["message"] = ed.pop("event")
|
|
96
|
-
ed["severity"] = ed.pop("level", "info").upper()
|
|
97
|
-
return ed
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
class _MaxLevelFilter(Filter):
|
|
101
|
-
def __init__(self, highest_log_level: int) -> None:
|
|
102
|
-
self._highest_log_level = highest_log_level
|
|
103
|
-
|
|
104
|
-
def filter(self, log_record: LogRecord) -> bool:
|
|
105
|
-
return log_record.levelno <= self._highest_log_level
|
|
File without changes
|
{ftmq-4.3.2 → ftmq-4.5.3}/NOTICE
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|