anysite-cli 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anysite/dataset/cli.py +10 -1
- anysite/dataset/collector.py +4 -3
- anysite/dataset/db_loader.py +166 -23
- anysite/dataset/differ.py +203 -48
- anysite/dataset/models.py +5 -0
- anysite/dataset/storage.py +21 -1
- {anysite_cli-0.1.3.dist-info → anysite_cli-0.1.5.dist-info}/METADATA +14 -3
- {anysite_cli-0.1.3.dist-info → anysite_cli-0.1.5.dist-info}/RECORD +11 -11
- {anysite_cli-0.1.3.dist-info → anysite_cli-0.1.5.dist-info}/WHEEL +0 -0
- {anysite_cli-0.1.3.dist-info → anysite_cli-0.1.5.dist-info}/entry_points.txt +0 -0
- {anysite_cli-0.1.3.dist-info → anysite_cli-0.1.5.dist-info}/licenses/LICENSE +0 -0
anysite/dataset/cli.py
CHANGED
|
@@ -357,6 +357,10 @@ def load_db(
|
|
|
357
357
|
bool,
|
|
358
358
|
typer.Option("--quiet", "-q", help="Suppress progress output"),
|
|
359
359
|
] = False,
|
|
360
|
+
snapshot: Annotated[
|
|
361
|
+
str | None,
|
|
362
|
+
typer.Option("--snapshot", help="Load a specific snapshot date (YYYY-MM-DD)"),
|
|
363
|
+
] = None,
|
|
360
364
|
) -> None:
|
|
361
365
|
"""Load collected Parquet data into a relational database with FK linking."""
|
|
362
366
|
config = _load_config(config_path)
|
|
@@ -379,6 +383,7 @@ def load_db(
|
|
|
379
383
|
source_filter=source,
|
|
380
384
|
drop_existing=drop_existing,
|
|
381
385
|
dry_run=dry_run,
|
|
386
|
+
snapshot=snapshot,
|
|
382
387
|
)
|
|
383
388
|
except Exception as e:
|
|
384
389
|
typer.echo(f"Load error: {e}", err=True)
|
|
@@ -519,7 +524,11 @@ def diff_cmd(
|
|
|
519
524
|
return
|
|
520
525
|
|
|
521
526
|
# Format and output
|
|
522
|
-
rows =
|
|
527
|
+
rows = (
|
|
528
|
+
format_diff_table(result, output_fields=field_list)
|
|
529
|
+
if format == "table"
|
|
530
|
+
else format_diff_records(result, output_fields=field_list)
|
|
531
|
+
)
|
|
523
532
|
|
|
524
533
|
_output_results(rows, format, output)
|
|
525
534
|
|
anysite/dataset/collector.py
CHANGED
|
@@ -19,6 +19,7 @@ from anysite.dataset.models import DatasetConfig, DatasetSource
|
|
|
19
19
|
from anysite.dataset.storage import (
|
|
20
20
|
MetadataStore,
|
|
21
21
|
get_parquet_path,
|
|
22
|
+
read_latest_parquet,
|
|
22
23
|
read_parquet,
|
|
23
24
|
write_parquet,
|
|
24
25
|
)
|
|
@@ -412,9 +413,9 @@ async def _collect_dependent(
|
|
|
412
413
|
if dep is None:
|
|
413
414
|
raise DatasetError(f"Source {source.id} has no dependency defined")
|
|
414
415
|
|
|
415
|
-
# Read parent data
|
|
416
|
+
# Read parent data (latest snapshot only to avoid schema mismatch)
|
|
416
417
|
parent_dir = base_path / "raw" / dep.from_source
|
|
417
|
-
parent_records =
|
|
418
|
+
parent_records = read_latest_parquet(parent_dir)
|
|
418
419
|
|
|
419
420
|
if not parent_records:
|
|
420
421
|
if not quiet:
|
|
@@ -627,7 +628,7 @@ def _count_dependent_inputs(
|
|
|
627
628
|
if dep is None:
|
|
628
629
|
return None
|
|
629
630
|
parent_dir = base_path / "raw" / dep.from_source
|
|
630
|
-
parent_records =
|
|
631
|
+
parent_records = read_latest_parquet(parent_dir)
|
|
631
632
|
if not parent_records:
|
|
632
633
|
info = metadata.get_source_info(dep.from_source)
|
|
633
634
|
return info.get("record_count") if info else None
|
anysite/dataset/db_loader.py
CHANGED
|
@@ -3,12 +3,18 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
|
+
import logging
|
|
7
|
+
from datetime import date
|
|
8
|
+
from pathlib import Path
|
|
6
9
|
from typing import Any
|
|
7
10
|
|
|
8
11
|
from anysite.dataset.models import DatasetConfig, DatasetSource
|
|
9
12
|
from anysite.dataset.storage import get_source_dir, read_parquet
|
|
10
13
|
from anysite.db.adapters.base import DatabaseAdapter
|
|
11
14
|
from anysite.db.schema.inference import infer_table_schema
|
|
15
|
+
from anysite.db.utils.sanitize import sanitize_identifier
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
12
18
|
|
|
13
19
|
|
|
14
20
|
def _get_dialect(adapter: DatabaseAdapter) -> str:
|
|
@@ -86,15 +92,31 @@ def _filter_record(
|
|
|
86
92
|
return {k: v for k, v in record.items() if k not in exclude}
|
|
87
93
|
|
|
88
94
|
|
|
95
|
+
def _get_latest_parquet(base_path: Path, source_id: str) -> Path | None:
|
|
96
|
+
"""Return the path to the most recent snapshot for a source."""
|
|
97
|
+
source_dir = get_source_dir(base_path, source_id)
|
|
98
|
+
if not source_dir.exists():
|
|
99
|
+
return None
|
|
100
|
+
files = sorted(source_dir.glob("*.parquet"))
|
|
101
|
+
return files[-1] if files else None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _get_snapshot_for_date(base_path: Path, source_id: str, d: date) -> Path | None:
|
|
105
|
+
"""Return the parquet path for a specific snapshot date."""
|
|
106
|
+
source_dir = get_source_dir(base_path, source_id)
|
|
107
|
+
path = source_dir / f"{d.isoformat()}.parquet"
|
|
108
|
+
return path if path.exists() else None
|
|
109
|
+
|
|
110
|
+
|
|
89
111
|
class DatasetDbLoader:
|
|
90
112
|
"""Load dataset Parquet data into a relational database.
|
|
91
113
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
114
|
+
Supports diff-based incremental sync when ``db_load.key`` is configured:
|
|
115
|
+
compares the two most recent snapshots and applies INSERT/DELETE/UPDATE
|
|
116
|
+
to keep the database in sync.
|
|
117
|
+
|
|
118
|
+
Falls back to full INSERT of the latest snapshot when no key is set
|
|
119
|
+
or when the table doesn't exist yet.
|
|
98
120
|
"""
|
|
99
121
|
|
|
100
122
|
def __init__(
|
|
@@ -115,16 +137,18 @@ class DatasetDbLoader:
|
|
|
115
137
|
source_filter: str | None = None,
|
|
116
138
|
drop_existing: bool = False,
|
|
117
139
|
dry_run: bool = False,
|
|
140
|
+
snapshot: str | None = None,
|
|
118
141
|
) -> dict[str, int]:
|
|
119
142
|
"""Load all sources into the database in dependency order.
|
|
120
143
|
|
|
121
144
|
Args:
|
|
122
145
|
source_filter: Only load this source (and dependencies).
|
|
123
|
-
drop_existing: Drop tables before creating.
|
|
146
|
+
drop_existing: Drop tables before creating, then full INSERT latest.
|
|
124
147
|
dry_run: Show plan without executing.
|
|
148
|
+
snapshot: Load a specific snapshot date (YYYY-MM-DD).
|
|
125
149
|
|
|
126
150
|
Returns:
|
|
127
|
-
Mapping of source_id to number of rows loaded.
|
|
151
|
+
Mapping of source_id to number of rows loaded/affected.
|
|
128
152
|
"""
|
|
129
153
|
sources = self.config.topological_sort()
|
|
130
154
|
|
|
@@ -139,6 +163,7 @@ class DatasetDbLoader:
|
|
|
139
163
|
source,
|
|
140
164
|
drop_existing=drop_existing,
|
|
141
165
|
dry_run=dry_run,
|
|
166
|
+
snapshot=snapshot,
|
|
142
167
|
)
|
|
143
168
|
results[source.id] = count
|
|
144
169
|
|
|
@@ -150,18 +175,64 @@ class DatasetDbLoader:
|
|
|
150
175
|
*,
|
|
151
176
|
drop_existing: bool = False,
|
|
152
177
|
dry_run: bool = False,
|
|
178
|
+
snapshot: str | None = None,
|
|
153
179
|
) -> int:
|
|
154
|
-
"""Load a single source into the database.
|
|
155
|
-
|
|
156
|
-
|
|
180
|
+
"""Load a single source into the database.
|
|
181
|
+
|
|
182
|
+
Strategy:
|
|
183
|
+
1. ``drop_existing``: drop table → full INSERT of latest snapshot
|
|
184
|
+
2. ``snapshot``: full INSERT of that specific snapshot
|
|
185
|
+
3. Table doesn't exist: full INSERT of latest snapshot
|
|
186
|
+
4. Table exists + ``db_load.key`` set + ≥2 snapshots: diff-based sync
|
|
187
|
+
5. Fallback: full INSERT of latest snapshot
|
|
188
|
+
"""
|
|
189
|
+
table_name = _table_name_for(source)
|
|
190
|
+
|
|
191
|
+
# Handle drop_existing
|
|
192
|
+
if drop_existing and self.adapter.table_exists(table_name):
|
|
193
|
+
self.adapter.execute(f"DROP TABLE {table_name}")
|
|
194
|
+
|
|
195
|
+
# Determine which parquet to load
|
|
196
|
+
if snapshot:
|
|
197
|
+
snapshot_date = date.fromisoformat(snapshot)
|
|
198
|
+
parquet_path = _get_snapshot_for_date(self.base_path, source.id, snapshot_date)
|
|
199
|
+
if parquet_path is None:
|
|
200
|
+
return 0
|
|
201
|
+
return self._full_insert(source, table_name, parquet_path, dry_run=dry_run)
|
|
202
|
+
|
|
203
|
+
# Check if we can do diff-based sync
|
|
204
|
+
diff_key = source.db_load.key if source.db_load else None
|
|
205
|
+
table_exists = self.adapter.table_exists(table_name)
|
|
206
|
+
|
|
207
|
+
if diff_key and table_exists and not drop_existing:
|
|
208
|
+
from anysite.dataset.differ import DatasetDiffer
|
|
209
|
+
differ = DatasetDiffer(self.base_path)
|
|
210
|
+
dates = differ.available_dates(source.id)
|
|
211
|
+
|
|
212
|
+
if len(dates) >= 2:
|
|
213
|
+
return self._diff_sync(
|
|
214
|
+
source, table_name, diff_key, differ, dates, dry_run=dry_run
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Fallback: full INSERT of latest snapshot
|
|
218
|
+
latest = _get_latest_parquet(self.base_path, source.id)
|
|
219
|
+
if latest is None:
|
|
157
220
|
return 0
|
|
221
|
+
return self._full_insert(source, table_name, latest, dry_run=dry_run)
|
|
158
222
|
|
|
159
|
-
|
|
223
|
+
def _full_insert(
|
|
224
|
+
self,
|
|
225
|
+
source: DatasetSource,
|
|
226
|
+
table_name: str,
|
|
227
|
+
parquet_path: Path,
|
|
228
|
+
*,
|
|
229
|
+
dry_run: bool = False,
|
|
230
|
+
) -> int:
|
|
231
|
+
"""Full INSERT: read parquet, transform, create table if needed, insert all rows."""
|
|
232
|
+
raw_records = read_parquet(parquet_path)
|
|
160
233
|
if not raw_records:
|
|
161
234
|
return 0
|
|
162
235
|
|
|
163
|
-
table_name = _table_name_for(source)
|
|
164
|
-
|
|
165
236
|
# Determine parent info for FK linking
|
|
166
237
|
parent_source_id = None
|
|
167
238
|
parent_fk_col = None
|
|
@@ -174,7 +245,6 @@ class DatasetDbLoader:
|
|
|
174
245
|
for record in raw_records:
|
|
175
246
|
row = _filter_record(record, source)
|
|
176
247
|
|
|
177
|
-
# Add FK column if this is a dependent source
|
|
178
248
|
if parent_source_id and parent_fk_col:
|
|
179
249
|
input_val = record.get("_input_value")
|
|
180
250
|
parent_map = self._value_to_id.get(parent_source_id, {})
|
|
@@ -189,17 +259,12 @@ class DatasetDbLoader:
|
|
|
189
259
|
return len(rows)
|
|
190
260
|
|
|
191
261
|
# Determine the lookup field for children to reference this source
|
|
192
|
-
# This is the field that child dependencies extract from this source
|
|
193
262
|
lookup_field = self._get_child_lookup_field(source)
|
|
194
263
|
|
|
195
|
-
# Create table
|
|
196
|
-
if drop_existing and self.adapter.table_exists(table_name):
|
|
197
|
-
self.adapter.execute(f"DROP TABLE {table_name}")
|
|
198
|
-
|
|
264
|
+
# Create table if needed
|
|
199
265
|
if not self.adapter.table_exists(table_name):
|
|
200
266
|
schema = infer_table_schema(table_name, rows)
|
|
201
267
|
sql_types = schema.to_sql_types(self._dialect)
|
|
202
|
-
# Add auto-increment id column
|
|
203
268
|
col_defs = {"id": self._auto_id_type()}
|
|
204
269
|
col_defs.update(sql_types)
|
|
205
270
|
self.adapter.create_table(table_name, col_defs, primary_key="id")
|
|
@@ -208,10 +273,8 @@ class DatasetDbLoader:
|
|
|
208
273
|
value_map: dict[str, int] = {}
|
|
209
274
|
for i, row in enumerate(rows):
|
|
210
275
|
self.adapter.insert_batch(table_name, [row])
|
|
211
|
-
# Get the last inserted id
|
|
212
276
|
last_id = self._get_last_id(table_name)
|
|
213
277
|
|
|
214
|
-
# Build value→id map for child sources
|
|
215
278
|
if lookup_field and last_id is not None:
|
|
216
279
|
raw_record = raw_records[i]
|
|
217
280
|
lookup_val = _extract_dot_value(raw_record, lookup_field)
|
|
@@ -225,6 +288,86 @@ class DatasetDbLoader:
|
|
|
225
288
|
|
|
226
289
|
return len(rows)
|
|
227
290
|
|
|
291
|
+
def _diff_sync(
|
|
292
|
+
self,
|
|
293
|
+
source: DatasetSource,
|
|
294
|
+
table_name: str,
|
|
295
|
+
diff_key: str,
|
|
296
|
+
differ: Any,
|
|
297
|
+
dates: list[date],
|
|
298
|
+
*,
|
|
299
|
+
dry_run: bool = False,
|
|
300
|
+
) -> int:
|
|
301
|
+
"""Diff-based incremental sync: compare two most recent snapshots, apply delta."""
|
|
302
|
+
result = differ.diff(source.id, diff_key)
|
|
303
|
+
total = 0
|
|
304
|
+
sync_mode = source.db_load.sync if source.db_load else "full"
|
|
305
|
+
|
|
306
|
+
if dry_run:
|
|
307
|
+
count = len(result.added) + len(result.changed)
|
|
308
|
+
if sync_mode == "full":
|
|
309
|
+
count += len(result.removed)
|
|
310
|
+
return count
|
|
311
|
+
|
|
312
|
+
# Extract key value from a record (handles dot-notation)
|
|
313
|
+
def _get_key_val(record: dict[str, Any]) -> Any:
|
|
314
|
+
if "." in diff_key:
|
|
315
|
+
return _extract_dot_value(record, diff_key)
|
|
316
|
+
return record.get(diff_key)
|
|
317
|
+
|
|
318
|
+
# Determine the DB column name for the key
|
|
319
|
+
db_key_col = diff_key.replace(".", "_")
|
|
320
|
+
|
|
321
|
+
# INSERT added records
|
|
322
|
+
if result.added:
|
|
323
|
+
for record in result.added:
|
|
324
|
+
row = _filter_record(record, source)
|
|
325
|
+
self.adapter.insert_batch(table_name, [row])
|
|
326
|
+
total += 1
|
|
327
|
+
|
|
328
|
+
# DELETE removed records (skipped in append mode)
|
|
329
|
+
if result.removed and sync_mode == "full":
|
|
330
|
+
safe_col = sanitize_identifier(db_key_col)
|
|
331
|
+
for record in result.removed:
|
|
332
|
+
key_val = _get_key_val(record)
|
|
333
|
+
if key_val is not None:
|
|
334
|
+
self.adapter.execute(
|
|
335
|
+
f"DELETE FROM {table_name} WHERE {safe_col} = ?",
|
|
336
|
+
(str(key_val),),
|
|
337
|
+
)
|
|
338
|
+
total += 1
|
|
339
|
+
|
|
340
|
+
# UPDATE changed records
|
|
341
|
+
if result.changed:
|
|
342
|
+
safe_col = sanitize_identifier(db_key_col)
|
|
343
|
+
for record in result.changed:
|
|
344
|
+
key_val = _get_key_val(record)
|
|
345
|
+
if key_val is None:
|
|
346
|
+
continue
|
|
347
|
+
changed_fields = record.get("_changed_fields", [])
|
|
348
|
+
if not changed_fields:
|
|
349
|
+
continue
|
|
350
|
+
|
|
351
|
+
# Build SET clause from changed fields
|
|
352
|
+
set_parts = []
|
|
353
|
+
params: list[Any] = []
|
|
354
|
+
for field_name in changed_fields:
|
|
355
|
+
new_val = record.get(field_name)
|
|
356
|
+
safe_field = sanitize_identifier(field_name)
|
|
357
|
+
set_parts.append(f"{safe_field} = ?")
|
|
358
|
+
params.append(new_val)
|
|
359
|
+
|
|
360
|
+
params.append(str(key_val))
|
|
361
|
+
sql = (
|
|
362
|
+
f"UPDATE {table_name} "
|
|
363
|
+
f"SET {', '.join(set_parts)} "
|
|
364
|
+
f"WHERE {safe_col} = ?"
|
|
365
|
+
)
|
|
366
|
+
self.adapter.execute(sql, tuple(params))
|
|
367
|
+
total += 1
|
|
368
|
+
|
|
369
|
+
return total
|
|
370
|
+
|
|
228
371
|
def _get_child_lookup_field(self, source: DatasetSource) -> str | None:
|
|
229
372
|
"""Find which field children use to reference this source."""
|
|
230
373
|
for other in self.config.sources:
|
anysite/dataset/differ.py
CHANGED
|
@@ -12,6 +12,31 @@ from anysite.dataset.errors import DatasetError
|
|
|
12
12
|
from anysite.dataset.storage import get_source_dir
|
|
13
13
|
|
|
14
14
|
|
|
15
|
+
def _build_key_expr(key: str, all_columns: list[str]) -> tuple[str, str]:
|
|
16
|
+
"""Build a DuckDB key expression, supporting dot-notation for JSON fields.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
(key_expr, key_alias) — the SQL expression and a display alias.
|
|
20
|
+
For simple keys: ('"field"', 'field')
|
|
21
|
+
For dot-notation: ("json_extract_string(\"urn\", '$.value')", 'urn.value')
|
|
22
|
+
"""
|
|
23
|
+
if "." not in key:
|
|
24
|
+
if key not in all_columns:
|
|
25
|
+
raise DatasetError(
|
|
26
|
+
f"Key field '{key}' not found. "
|
|
27
|
+
f"Available: {', '.join(all_columns)}"
|
|
28
|
+
)
|
|
29
|
+
return f'"{key}"', key
|
|
30
|
+
|
|
31
|
+
root, rest = key.split(".", 1)
|
|
32
|
+
if root not in all_columns:
|
|
33
|
+
raise DatasetError(
|
|
34
|
+
f"Root field '{root}' (from key '{key}') not found. "
|
|
35
|
+
f"Available: {', '.join(all_columns)}"
|
|
36
|
+
)
|
|
37
|
+
return f"json_extract_string(\"{root}\", '$.{rest}')", key
|
|
38
|
+
|
|
39
|
+
|
|
15
40
|
@dataclass
|
|
16
41
|
class DiffResult:
|
|
17
42
|
"""Result of comparing two dataset snapshots."""
|
|
@@ -24,6 +49,7 @@ class DiffResult:
|
|
|
24
49
|
removed: list[dict[str, Any]] = field(default_factory=list)
|
|
25
50
|
changed: list[dict[str, Any]] = field(default_factory=list)
|
|
26
51
|
unchanged_count: int = 0
|
|
52
|
+
fields: list[str] | None = field(default=None)
|
|
27
53
|
|
|
28
54
|
@property
|
|
29
55
|
def has_changes(self) -> bool:
|
|
@@ -63,10 +89,11 @@ class DatasetDiffer:
|
|
|
63
89
|
|
|
64
90
|
Args:
|
|
65
91
|
source_id: Source to compare.
|
|
66
|
-
key: Field to match records by
|
|
92
|
+
key: Field to match records by. Supports dot-notation for
|
|
93
|
+
JSON fields (e.g., ``urn.value``).
|
|
67
94
|
from_date: Older snapshot date (default: second-to-last).
|
|
68
95
|
to_date: Newer snapshot date (default: latest).
|
|
69
|
-
fields: Only compare these fields (default: all).
|
|
96
|
+
fields: Only compare (and output) these fields (default: all).
|
|
70
97
|
|
|
71
98
|
Returns:
|
|
72
99
|
DiffResult with added, removed, changed lists.
|
|
@@ -153,50 +180,43 @@ class DatasetDiffer:
|
|
|
153
180
|
info = conn.execute("DESCRIBE _new").fetchall()
|
|
154
181
|
all_columns = [col[0] for col in info]
|
|
155
182
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
f"Key field '{key}' not found in {source_id}. "
|
|
159
|
-
f"Available: {', '.join(all_columns)}"
|
|
160
|
-
)
|
|
183
|
+
# Build key expression (supports dot-notation)
|
|
184
|
+
key_expr, key_alias = _build_key_expr(key, all_columns)
|
|
161
185
|
|
|
162
186
|
# Determine which fields to compare
|
|
163
187
|
compare_fields = fields if fields else [
|
|
164
|
-
c for c in all_columns if c != key
|
|
188
|
+
c for c in all_columns if c != key and c != key.split(".")[0]
|
|
165
189
|
]
|
|
166
190
|
# Filter to fields that actually exist
|
|
167
191
|
compare_fields = [c for c in compare_fields if c in all_columns]
|
|
168
192
|
|
|
169
|
-
|
|
193
|
+
# Determine output columns: if fields specified, restrict to key + fields
|
|
194
|
+
if fields:
|
|
195
|
+
output_columns = [key_alias] + [
|
|
196
|
+
f for f in fields if f in all_columns
|
|
197
|
+
]
|
|
198
|
+
else:
|
|
199
|
+
output_columns = None # all columns
|
|
170
200
|
|
|
171
201
|
# Added: in new but not in old
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
).fetchall()
|
|
176
|
-
added_cols = [d[0] for d in conn.execute(
|
|
177
|
-
"DESCRIBE _new"
|
|
178
|
-
).fetchall()]
|
|
179
|
-
added_records = [dict(zip(added_cols, row, strict=False)) for row in added]
|
|
202
|
+
added_records = self._query_added_removed(
|
|
203
|
+
conn, "_new", "_old", key_expr, key_alias, all_columns, output_columns
|
|
204
|
+
)
|
|
180
205
|
|
|
181
206
|
# Removed: in old but not in new
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
).fetchall()
|
|
186
|
-
removed_cols = [d[0] for d in conn.execute(
|
|
187
|
-
"DESCRIBE _old"
|
|
188
|
-
).fetchall()]
|
|
189
|
-
removed_records = [dict(zip(removed_cols, row, strict=False)) for row in removed]
|
|
207
|
+
removed_records = self._query_added_removed(
|
|
208
|
+
conn, "_old", "_new", key_expr, key_alias, all_columns, output_columns
|
|
209
|
+
)
|
|
190
210
|
|
|
191
211
|
# Changed: matching key, different values
|
|
192
212
|
changed_records = self._find_changed(
|
|
193
|
-
conn,
|
|
213
|
+
conn, key_expr, key_alias, compare_fields, all_columns, output_columns
|
|
194
214
|
)
|
|
195
215
|
|
|
196
216
|
# Count unchanged
|
|
197
217
|
total_matched = conn.execute(
|
|
198
218
|
f"SELECT COUNT(*) FROM _new n "
|
|
199
|
-
f"JOIN _old o ON n
|
|
219
|
+
f"JOIN _old o ON ({_requalify(key_expr, 'n')}) = ({_requalify(key_expr, 'o')})"
|
|
200
220
|
).fetchone()
|
|
201
221
|
matched_count = total_matched[0] if total_matched else 0
|
|
202
222
|
unchanged_count = matched_count - len(changed_records)
|
|
@@ -210,23 +230,59 @@ class DatasetDiffer:
|
|
|
210
230
|
removed=removed_records,
|
|
211
231
|
changed=changed_records,
|
|
212
232
|
unchanged_count=unchanged_count,
|
|
233
|
+
fields=fields,
|
|
213
234
|
)
|
|
214
235
|
finally:
|
|
215
236
|
conn.close()
|
|
216
237
|
|
|
238
|
+
@staticmethod
|
|
239
|
+
def _query_added_removed(
|
|
240
|
+
conn: Any,
|
|
241
|
+
present_view: str,
|
|
242
|
+
absent_view: str,
|
|
243
|
+
key_expr: str,
|
|
244
|
+
key_alias: str,
|
|
245
|
+
all_columns: list[str],
|
|
246
|
+
output_columns: list[str] | None,
|
|
247
|
+
) -> list[dict[str, Any]]:
|
|
248
|
+
"""Query records present in one view but not the other."""
|
|
249
|
+
# Build SELECT list
|
|
250
|
+
if output_columns:
|
|
251
|
+
select_parts = []
|
|
252
|
+
for col in output_columns:
|
|
253
|
+
if col == key_alias and "." in col:
|
|
254
|
+
select_parts.append(f"{key_expr} AS \"{key_alias}\"")
|
|
255
|
+
else:
|
|
256
|
+
select_parts.append(f'"{col}"')
|
|
257
|
+
select_clause = ", ".join(select_parts)
|
|
258
|
+
else:
|
|
259
|
+
if "." in key_alias:
|
|
260
|
+
select_clause = f"*, {key_expr} AS \"{key_alias}\""
|
|
261
|
+
else:
|
|
262
|
+
select_clause = "*"
|
|
263
|
+
|
|
264
|
+
sql = (
|
|
265
|
+
f"SELECT {select_clause} FROM {present_view} "
|
|
266
|
+
f"WHERE ({key_expr}) NOT IN (SELECT ({key_expr}) FROM {absent_view})"
|
|
267
|
+
)
|
|
268
|
+
result = conn.execute(sql)
|
|
269
|
+
columns = [desc[0] for desc in result.description]
|
|
270
|
+
rows = result.fetchall()
|
|
271
|
+
return [dict(zip(columns, row, strict=False)) for row in rows]
|
|
272
|
+
|
|
273
|
+
@staticmethod
|
|
217
274
|
def _find_changed(
|
|
218
|
-
self,
|
|
219
275
|
conn: Any,
|
|
220
|
-
|
|
276
|
+
key_expr: str,
|
|
277
|
+
key_alias: str,
|
|
221
278
|
compare_fields: list[str],
|
|
222
279
|
all_columns: list[str],
|
|
280
|
+
output_columns: list[str] | None,
|
|
223
281
|
) -> list[dict[str, Any]]:
|
|
224
282
|
"""Find records that exist in both snapshots but have different values."""
|
|
225
283
|
if not compare_fields:
|
|
226
284
|
return []
|
|
227
285
|
|
|
228
|
-
quoted_key = f'"{key}"'
|
|
229
|
-
|
|
230
286
|
# Build WHERE clause: any compared field differs
|
|
231
287
|
where_parts = []
|
|
232
288
|
for col in compare_fields:
|
|
@@ -234,21 +290,43 @@ class DatasetDiffer:
|
|
|
234
290
|
where_parts.append(f"n.{qc} IS DISTINCT FROM o.{qc}")
|
|
235
291
|
where_clause = " OR ".join(where_parts)
|
|
236
292
|
|
|
237
|
-
#
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
select_parts
|
|
293
|
+
# Build JOIN condition
|
|
294
|
+
join_key_n = _requalify(key_expr, "n")
|
|
295
|
+
join_key_o = _requalify(key_expr, "o")
|
|
296
|
+
join_cond = f"({join_key_n}) = ({join_key_o})"
|
|
297
|
+
|
|
298
|
+
# Build SELECT: key + output fields + __old for compare fields
|
|
299
|
+
if output_columns:
|
|
300
|
+
# Restricted output
|
|
301
|
+
select_parts = []
|
|
302
|
+
for col in output_columns:
|
|
303
|
+
if col == key_alias and "." in col:
|
|
304
|
+
select_parts.append(f"{_requalify(key_expr, 'n')} AS \"{key_alias}\"")
|
|
305
|
+
else:
|
|
306
|
+
select_parts.append(f"n.\"{col}\"")
|
|
307
|
+
for col in compare_fields:
|
|
308
|
+
# Include __old for compare fields that are in output
|
|
309
|
+
if col in [c for c in output_columns if c != key_alias]:
|
|
310
|
+
select_parts.append(f"o.\"{col}\" AS \"{col}__old\"")
|
|
311
|
+
else:
|
|
312
|
+
# Full output
|
|
313
|
+
select_parts = []
|
|
314
|
+
if "." in key_alias:
|
|
315
|
+
select_parts.append(f"{_requalify(key_expr, 'n')} AS \"{key_alias}\"")
|
|
316
|
+
else:
|
|
317
|
+
select_parts.append(f"n.\"{key_alias}\"")
|
|
318
|
+
for col in all_columns:
|
|
319
|
+
if col == key_alias:
|
|
320
|
+
continue
|
|
321
|
+
select_parts.append(f"n.\"{col}\"")
|
|
322
|
+
for col in compare_fields:
|
|
323
|
+
select_parts.append(f"o.\"{col}\" AS \"{col}__old\"")
|
|
246
324
|
|
|
247
325
|
select_clause = ", ".join(select_parts)
|
|
248
326
|
|
|
249
327
|
sql = (
|
|
250
328
|
f"SELECT {select_clause} FROM _new n "
|
|
251
|
-
f"JOIN _old o ON
|
|
329
|
+
f"JOIN _old o ON {join_cond} "
|
|
252
330
|
f"WHERE {where_clause}"
|
|
253
331
|
)
|
|
254
332
|
|
|
@@ -266,11 +344,32 @@ class DatasetDiffer:
|
|
|
266
344
|
old_val = record.get(old_key)
|
|
267
345
|
if _values_differ(new_val, old_val):
|
|
268
346
|
changed_fields.append(col)
|
|
347
|
+
# Fallback: DuckDB detected a change but Python comparison missed it
|
|
348
|
+
if not changed_fields:
|
|
349
|
+
changed_fields = list(compare_fields)
|
|
269
350
|
record["_changed_fields"] = changed_fields
|
|
270
351
|
|
|
271
352
|
return records
|
|
272
353
|
|
|
273
354
|
|
|
355
|
+
def _requalify(key_expr: str, prefix: str) -> str:
|
|
356
|
+
"""Requalify a key expression with a table alias prefix.
|
|
357
|
+
|
|
358
|
+
For simple keys like '"field"', returns 'prefix."field"'.
|
|
359
|
+
For json_extract_string("col", '$.path'), returns
|
|
360
|
+
json_extract_string(prefix."col", '$.path').
|
|
361
|
+
"""
|
|
362
|
+
if key_expr.startswith("json_extract_string("):
|
|
363
|
+
# Replace the column reference inside json_extract_string
|
|
364
|
+
inner = key_expr[len("json_extract_string("):]
|
|
365
|
+
# inner looks like: "col", '$.path')
|
|
366
|
+
col_end = inner.index(",")
|
|
367
|
+
col = inner[:col_end].strip()
|
|
368
|
+
rest = inner[col_end:]
|
|
369
|
+
return f"json_extract_string({prefix}.{col}{rest}"
|
|
370
|
+
return f"{prefix}.{key_expr}"
|
|
371
|
+
|
|
372
|
+
|
|
274
373
|
def _values_differ(a: Any, b: Any) -> bool:
|
|
275
374
|
"""Compare two values, treating JSON strings as equivalent to their parsed form."""
|
|
276
375
|
if a == b:
|
|
@@ -281,24 +380,42 @@ def _values_differ(a: Any, b: Any) -> bool:
|
|
|
281
380
|
return json.loads(a) != json.loads(b)
|
|
282
381
|
except (json.JSONDecodeError, ValueError):
|
|
283
382
|
pass
|
|
383
|
+
# Handle complex types (dict, list) — compare via JSON serialization
|
|
384
|
+
# to catch differences DuckDB sees but Python equality misses
|
|
385
|
+
if isinstance(a, (dict, list)) or isinstance(b, (dict, list)):
|
|
386
|
+
try:
|
|
387
|
+
return json.dumps(a, sort_keys=True, default=str) != json.dumps(
|
|
388
|
+
b, sort_keys=True, default=str
|
|
389
|
+
)
|
|
390
|
+
except (TypeError, ValueError):
|
|
391
|
+
pass
|
|
284
392
|
return True
|
|
285
393
|
|
|
286
394
|
|
|
287
|
-
def format_diff_table(
|
|
395
|
+
def format_diff_table(
|
|
396
|
+
result: DiffResult,
|
|
397
|
+
*,
|
|
398
|
+
output_fields: list[str] | None = None,
|
|
399
|
+
) -> list[dict[str, Any]]:
|
|
288
400
|
"""Format a DiffResult into a flat list of dicts for table/json output.
|
|
289
401
|
|
|
290
402
|
Each record gets a ``_diff`` column with value ``added``, ``removed``,
|
|
291
403
|
or ``changed``. For changed records in table mode, modified field
|
|
292
404
|
values are formatted as ``old → new``.
|
|
405
|
+
|
|
406
|
+
Args:
|
|
407
|
+
result: The diff result.
|
|
408
|
+
output_fields: If set, only include these fields (plus ``_diff`` and key).
|
|
293
409
|
"""
|
|
410
|
+
allowed = _build_allowed_set(result.key, output_fields)
|
|
294
411
|
rows: list[dict[str, Any]] = []
|
|
295
412
|
|
|
296
413
|
for record in result.added:
|
|
297
|
-
row = {"_diff": "added", **record}
|
|
414
|
+
row = {"_diff": "added", **_filter_row(record, allowed)}
|
|
298
415
|
rows.append(row)
|
|
299
416
|
|
|
300
417
|
for record in result.removed:
|
|
301
|
-
row = {"_diff": "removed", **record}
|
|
418
|
+
row = {"_diff": "removed", **_filter_row(record, allowed)}
|
|
302
419
|
rows.append(row)
|
|
303
420
|
|
|
304
421
|
for record in result.changed:
|
|
@@ -309,6 +426,8 @@ def format_diff_table(result: DiffResult) -> list[dict[str, Any]]:
|
|
|
309
426
|
continue
|
|
310
427
|
if k.endswith("__old"):
|
|
311
428
|
continue
|
|
429
|
+
if allowed and k not in allowed:
|
|
430
|
+
continue
|
|
312
431
|
# For changed fields, format as "old → new"
|
|
313
432
|
if k in changed_fields:
|
|
314
433
|
old_val = record.get(f"{k}__old")
|
|
@@ -320,31 +439,67 @@ def format_diff_table(result: DiffResult) -> list[dict[str, Any]]:
|
|
|
320
439
|
return rows
|
|
321
440
|
|
|
322
441
|
|
|
323
|
-
def format_diff_records(
|
|
442
|
+
def format_diff_records(
|
|
443
|
+
result: DiffResult,
|
|
444
|
+
*,
|
|
445
|
+
output_fields: list[str] | None = None,
|
|
446
|
+
) -> list[dict[str, Any]]:
|
|
324
447
|
"""Format a DiffResult for JSON/CSV output.
|
|
325
448
|
|
|
326
449
|
Each record gets ``_diff`` column. Changed records include both
|
|
327
450
|
current values and ``field__old`` columns.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
result: The diff result.
|
|
454
|
+
output_fields: If set, only include these fields (plus ``_diff``, key, and ``__old``).
|
|
328
455
|
"""
|
|
456
|
+
allowed = _build_allowed_set(result.key, output_fields)
|
|
329
457
|
rows: list[dict[str, Any]] = []
|
|
330
458
|
|
|
331
459
|
for record in result.added:
|
|
332
|
-
rows.append({"_diff": "added", **record})
|
|
460
|
+
rows.append({"_diff": "added", **_filter_row(record, allowed)})
|
|
333
461
|
|
|
334
462
|
for record in result.removed:
|
|
335
|
-
rows.append({"_diff": "removed", **record})
|
|
463
|
+
rows.append({"_diff": "removed", **_filter_row(record, allowed)})
|
|
336
464
|
|
|
337
465
|
for record in result.changed:
|
|
338
|
-
row = {"_diff": "changed"}
|
|
466
|
+
row: dict[str, Any] = {"_diff": "changed"}
|
|
467
|
+
changed_fields = record.get("_changed_fields", [])
|
|
468
|
+
row["_changed_fields"] = changed_fields
|
|
339
469
|
for k, v in record.items():
|
|
340
470
|
if k == "_changed_fields":
|
|
341
471
|
continue
|
|
472
|
+
if allowed and k not in allowed and not k.endswith("__old"):
|
|
473
|
+
continue
|
|
474
|
+
if k.endswith("__old") and allowed:
|
|
475
|
+
base = k[: -len("__old")]
|
|
476
|
+
if base not in allowed:
|
|
477
|
+
continue
|
|
342
478
|
row[k] = v
|
|
343
479
|
rows.append(row)
|
|
344
480
|
|
|
345
481
|
return rows
|
|
346
482
|
|
|
347
483
|
|
|
484
|
+
def _build_allowed_set(key: str, output_fields: list[str] | None) -> set[str] | None:
|
|
485
|
+
"""Build the set of allowed field names for output filtering."""
|
|
486
|
+
if not output_fields:
|
|
487
|
+
return None
|
|
488
|
+
allowed = set(output_fields)
|
|
489
|
+
allowed.add(key)
|
|
490
|
+
# Also add the root column for dot-notation keys
|
|
491
|
+
if "." in key:
|
|
492
|
+
allowed.add(key.split(".")[0])
|
|
493
|
+
return allowed
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def _filter_row(record: dict[str, Any], allowed: set[str] | None) -> dict[str, Any]:
|
|
497
|
+
"""Filter a record to only allowed fields."""
|
|
498
|
+
if not allowed:
|
|
499
|
+
return record
|
|
500
|
+
return {k: v for k, v in record.items() if k in allowed}
|
|
501
|
+
|
|
502
|
+
|
|
348
503
|
def _format_val(v: Any) -> str:
|
|
349
504
|
"""Format a value for display, truncating long strings."""
|
|
350
505
|
if v is None:
|
anysite/dataset/models.py
CHANGED
|
@@ -81,6 +81,11 @@ class DbLoadConfig(BaseModel):
|
|
|
81
81
|
"""Configuration for loading a source into a relational database."""
|
|
82
82
|
|
|
83
83
|
table: str | None = Field(default=None, description="Override table name (default: source id)")
|
|
84
|
+
key: str | None = Field(default=None, description="Unique key field for diff-based DB sync (e.g., urn.value)")
|
|
85
|
+
sync: Literal["full", "append"] = Field(
|
|
86
|
+
default="full",
|
|
87
|
+
description="Sync mode: 'full' applies INSERT/DELETE/UPDATE, 'append' skips DELETE (keeps old records)",
|
|
88
|
+
)
|
|
84
89
|
fields: list[str] = Field(default_factory=list, description="Fields to include (empty = all)")
|
|
85
90
|
exclude: list[str] = Field(
|
|
86
91
|
default_factory=lambda: ["_input_value", "_parent_source"],
|
anysite/dataset/storage.py
CHANGED
|
@@ -75,7 +75,7 @@ def read_parquet(path: Path) -> list[dict[str, Any]]:
|
|
|
75
75
|
tables = [pq.read_table(f) for f in files]
|
|
76
76
|
import pyarrow as pa
|
|
77
77
|
|
|
78
|
-
table = pa.concat_tables(tables)
|
|
78
|
+
table = pa.concat_tables(tables, promote_options="permissive")
|
|
79
79
|
else:
|
|
80
80
|
if not path.exists():
|
|
81
81
|
return []
|
|
@@ -84,6 +84,26 @@ def read_parquet(path: Path) -> list[dict[str, Any]]:
|
|
|
84
84
|
return table.to_pylist()
|
|
85
85
|
|
|
86
86
|
|
|
87
|
+
def read_latest_parquet(path: Path) -> list[dict[str, Any]]:
|
|
88
|
+
"""Read records from the most recent Parquet snapshot in a directory.
|
|
89
|
+
|
|
90
|
+
Unlike ``read_parquet(dir)``, this reads only the latest file, avoiding
|
|
91
|
+
schema mismatch errors when snapshots have different column types.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
path: Directory containing dated .parquet files.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
List of dicts from the newest snapshot, or [] if none found.
|
|
98
|
+
"""
|
|
99
|
+
if not path.is_dir():
|
|
100
|
+
return read_parquet(path)
|
|
101
|
+
files = sorted(path.glob("*.parquet"))
|
|
102
|
+
if not files:
|
|
103
|
+
return []
|
|
104
|
+
return read_parquet(files[-1])
|
|
105
|
+
|
|
106
|
+
|
|
87
107
|
def get_source_dir(base_path: Path, source_id: str) -> Path:
|
|
88
108
|
"""Get the raw data directory for a source."""
|
|
89
109
|
return base_path / "raw" / source_id
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: anysite-cli
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: CLI for Anysite API - web data extraction for humans and AI agents
|
|
5
5
|
Project-URL: Homepage, https://anysite.io
|
|
6
6
|
Project-URL: Documentation, https://docs.anysite.io/cli
|
|
@@ -259,6 +259,8 @@ sources:
|
|
|
259
259
|
path: ./output/companies-{{date}}.csv
|
|
260
260
|
format: csv
|
|
261
261
|
db_load:
|
|
262
|
+
key: _input_value # Unique key for incremental sync
|
|
263
|
+
sync: full # full (default) or append (no DELETE)
|
|
262
264
|
fields: [name, url, employee_count]
|
|
263
265
|
|
|
264
266
|
- id: employees
|
|
@@ -274,6 +276,8 @@ sources:
|
|
|
274
276
|
count: 5
|
|
275
277
|
refresh: always # Re-collect every run with --incremental
|
|
276
278
|
db_load:
|
|
279
|
+
key: urn.value # Unique key for incremental sync
|
|
280
|
+
sync: append # Keep old records (no DELETE on diff)
|
|
277
281
|
fields: [name, url, headline]
|
|
278
282
|
|
|
279
283
|
storage:
|
|
@@ -318,9 +322,15 @@ anysite dataset query dataset.yaml --interactive
|
|
|
318
322
|
anysite dataset stats dataset.yaml --source companies
|
|
319
323
|
anysite dataset profile dataset.yaml
|
|
320
324
|
|
|
321
|
-
# Load into PostgreSQL with automatic FK linking
|
|
325
|
+
# Load into PostgreSQL with automatic FK linking (incremental sync with db_load.key)
|
|
326
|
+
anysite dataset load-db dataset.yaml -c pg
|
|
327
|
+
|
|
328
|
+
# Drop and reload from latest snapshot
|
|
322
329
|
anysite dataset load-db dataset.yaml -c pg --drop-existing
|
|
323
330
|
|
|
331
|
+
# Load a specific snapshot date
|
|
332
|
+
anysite dataset load-db dataset.yaml -c pg --snapshot 2026-01-15
|
|
333
|
+
|
|
324
334
|
# Run history and logs
|
|
325
335
|
anysite dataset history my-dataset
|
|
326
336
|
anysite dataset logs my-dataset --run 42
|
|
@@ -328,8 +338,9 @@ anysite dataset logs my-dataset --run 42
|
|
|
328
338
|
# Generate cron/systemd schedule
|
|
329
339
|
anysite dataset schedule dataset.yaml --incremental --load-db pg
|
|
330
340
|
|
|
331
|
-
# Compare snapshots (diff two collection dates)
|
|
341
|
+
# Compare snapshots (diff two collection dates, supports dot-notation keys)
|
|
332
342
|
anysite dataset diff dataset.yaml --source employees --key _input_value
|
|
343
|
+
anysite dataset diff dataset.yaml --source profiles --key urn.value --fields "name,headline"
|
|
333
344
|
|
|
334
345
|
# Reset incremental state
|
|
335
346
|
anysite dataset reset-cursor dataset.yaml
|
|
@@ -19,17 +19,17 @@ anysite/config/paths.py,sha256=EmHJD8wlf4Q9IUn8Gp1JQ8Z3ffrIYAt5iHRyImQOf5I,1087
|
|
|
19
19
|
anysite/config/settings.py,sha256=Hc0j_aCCtkJeL4nHw-EFyfJ8WEDk57G08iNUFquUhpM,5235
|
|
20
20
|
anysite/dataset/__init__.py,sha256=J0sKQkGwVOPtvp6pka7LcdeUEADvjWRs71yRuROzJxI,847
|
|
21
21
|
anysite/dataset/analyzer.py,sha256=8dsPW32SbSaUTy1F0NIed1U45wjiMgQeJ2iWX7hBxRQ,9245
|
|
22
|
-
anysite/dataset/cli.py,sha256=
|
|
23
|
-
anysite/dataset/collector.py,sha256=
|
|
24
|
-
anysite/dataset/db_loader.py,sha256=
|
|
25
|
-
anysite/dataset/differ.py,sha256=
|
|
22
|
+
anysite/dataset/cli.py,sha256=rEWK1ka-YQ_Vbbj2nMaMYTD9g3wa3ethUWSoaWRSGTY,23066
|
|
23
|
+
anysite/dataset/collector.py,sha256=ZdR3CmQQew_iuJpNtJ4knSrjt0hvkEL4WIaS0IKEkwQ,23927
|
|
24
|
+
anysite/dataset/db_loader.py,sha256=ksvRt-VJISL4Syk2O1-TTkOMj1uGzk7aQARYS2n--U4,13751
|
|
25
|
+
anysite/dataset/differ.py,sha256=jB_VWTb7UuEBWG9nv1ry5xeo9hmWdhA_cTm6Ed43_Uw,17746
|
|
26
26
|
anysite/dataset/errors.py,sha256=r8cZXoIzSeTGCWpeYjntnN0AduCu74YZyWs3sFu17J4,914
|
|
27
27
|
anysite/dataset/exporters.py,sha256=mA2FYbYJbHfrwkXbHDu4g5qPG_JJKnkVciXFKPkF1Vw,3708
|
|
28
28
|
anysite/dataset/history.py,sha256=avFs0ADlM7Hr-ttqC1FfjJiQxvQP20sScM7ZoY4lvU0,5471
|
|
29
|
-
anysite/dataset/models.py,sha256=
|
|
29
|
+
anysite/dataset/models.py,sha256=d-bkgu2dUY7_VSgH-oVh84IV3X-KpxRfja0H5WnhauU,9998
|
|
30
30
|
anysite/dataset/notifications.py,sha256=ORzo9XOgOxzLb7rk4pevlKPB_Taf-jejlrtmO4Zgl2c,2367
|
|
31
31
|
anysite/dataset/scheduler.py,sha256=zpbA5tRUQZXr-9lZnG58dvE3E7ZBlAd-U-PTXExe9f0,3339
|
|
32
|
-
anysite/dataset/storage.py,sha256=
|
|
32
|
+
anysite/dataset/storage.py,sha256=ySY822m4lQd6Ip0i3VNPVbHEO6U6zBBwHi-56AXOaXE,5974
|
|
33
33
|
anysite/dataset/transformer.py,sha256=XBI4MiZ_F_IZdootV0GAePaM9-pUadIte7RABbjBipc,6843
|
|
34
34
|
anysite/db/__init__.py,sha256=xGGZHlMt5FUZjI6MAmf2VfyNLypOeXwrRL-gmuTsyl4,1117
|
|
35
35
|
anysite/db/cli.py,sha256=fYuIKWq7eF5mAfZWnXNbtlpITnbYbOFMm2TqU54xIl4,22118
|
|
@@ -58,8 +58,8 @@ anysite/streaming/writer.py,sha256=HfMsC4umUdJuNIAPK57YAxEGyTwUmy-zNrqFkwY6aew,4
|
|
|
58
58
|
anysite/utils/__init__.py,sha256=7SnbxpxKENK-2ecUL5NfnZ9okGI7COKYw4WF46172HM,23
|
|
59
59
|
anysite/utils/fields.py,sha256=bSrHadzNmabL4qubqhXXZoWb_P8KA-3S7_FLVT8nGBc,7410
|
|
60
60
|
anysite/utils/retry.py,sha256=89TbXvavi5t22P2mTYCLAS6SSZoW65gQ0nnYNbYAF0M,2684
|
|
61
|
-
anysite_cli-0.1.
|
|
62
|
-
anysite_cli-0.1.
|
|
63
|
-
anysite_cli-0.1.
|
|
64
|
-
anysite_cli-0.1.
|
|
65
|
-
anysite_cli-0.1.
|
|
61
|
+
anysite_cli-0.1.5.dist-info/METADATA,sha256=B4HxyrTZxBbhMb17lb0LoRcne_cRehz8xNUYIvDraMA,12437
|
|
62
|
+
anysite_cli-0.1.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
63
|
+
anysite_cli-0.1.5.dist-info/entry_points.txt,sha256=FDPxNasy0fRRcOgJdZRVP7Qw01C3TwRa1OwPJiskNyg,45
|
|
64
|
+
anysite_cli-0.1.5.dist-info/licenses/LICENSE,sha256=gVAxkI23CFm4x4HV_fkQYw_bGq93mQmVZEwxNs-YTa4,1069
|
|
65
|
+
anysite_cli-0.1.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|