anysite-cli 0.1.5__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/PKG-INFO +1 -1
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/pyproject.toml +1 -1
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/db_loader.py +58 -7
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_db_loader.py +223 -1
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/.claude/settings.local.json +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/.gitignore +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/CLAUDE.md +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/LICENSE +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/README.md +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/skills/anysite-cli/SKILL.md +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/skills/anysite-cli/references/api-reference.md +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/skills/anysite-cli/references/dataset-guide.md +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/__main__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/api/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/api/client.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/api/errors.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/api/schemas.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/batch/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/batch/executor.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/batch/input.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/batch/rate_limiter.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/cli/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/cli/config.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/cli/executor.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/cli/options.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/config/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/config/paths.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/config/settings.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/analyzer.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/cli.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/collector.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/differ.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/errors.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/exporters.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/history.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/models.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/notifications.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/scheduler.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/storage.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/transformer.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/adapters/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/adapters/base.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/adapters/postgres.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/adapters/sqlite.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/cli.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/config.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/manager.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/operations/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/operations/insert.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/operations/query.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/schema/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/schema/inference.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/schema/types.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/utils/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/utils/sanitize.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/main.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/models/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/output/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/output/console.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/output/formatters.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/output/templates.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/py.typed +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/streaming/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/streaming/progress.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/streaming/writer.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/utils/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/utils/fields.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/utils/retry.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/test_data/enriched_partners_sample_10.csv +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/test_data/linkedin-partners/company_aliases.txt +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/test_data/linkedin-partners/dataset.yaml +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/test_data/partners-deep/dataset.yaml +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/test_data/partners-intel/dataset.yaml +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/test_data/partners-linkedin/company_aliases.txt +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/test_data/partners-linkedin/dataset.yaml +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/test_data/partners-pipeline/dataset.yaml +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/conftest.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_api/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_batch/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_batch/test_executor.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_batch/test_input.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_batch/test_rate_limiter.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_cli/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_cli/test_main.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_analyzer.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_collector.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_differ.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_exporters.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_history.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_integration_csv.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_models.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_notifications.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_scheduler.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_storage.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_transformer.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/test_cli.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/test_config.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/test_inference.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/test_insert.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/test_manager.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/test_postgres_adapter.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/test_sanitize.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/test_sqlite_adapter.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_output/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_output/test_formatters.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_output/test_templates.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_streaming/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_streaming/test_progress.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_streaming/test_writer.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_utils/__init__.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_utils/test_fields.py +0 -0
- {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_utils/test_retry.py +0 -0
|
@@ -315,8 +315,14 @@ class DatasetDbLoader:
|
|
|
315
315
|
return _extract_dot_value(record, diff_key)
|
|
316
316
|
return record.get(diff_key)
|
|
317
317
|
|
|
318
|
+
# Build field mapping for db_load.fields filtering
|
|
319
|
+
field_mapping = self._get_db_field_mapping(source)
|
|
320
|
+
|
|
318
321
|
# Determine the DB column name for the key
|
|
319
|
-
|
|
322
|
+
if field_mapping and diff_key in field_mapping:
|
|
323
|
+
db_key_col = field_mapping[diff_key]
|
|
324
|
+
else:
|
|
325
|
+
db_key_col = diff_key.replace(".", "_")
|
|
320
326
|
|
|
321
327
|
# INSERT added records
|
|
322
328
|
if result.added:
|
|
@@ -326,13 +332,14 @@ class DatasetDbLoader:
|
|
|
326
332
|
total += 1
|
|
327
333
|
|
|
328
334
|
# DELETE removed records (skipped in append mode)
|
|
335
|
+
ph = self._placeholder()
|
|
329
336
|
if result.removed and sync_mode == "full":
|
|
330
337
|
safe_col = sanitize_identifier(db_key_col)
|
|
331
338
|
for record in result.removed:
|
|
332
339
|
key_val = _get_key_val(record)
|
|
333
340
|
if key_val is not None:
|
|
334
341
|
self.adapter.execute(
|
|
335
|
-
f"DELETE FROM {table_name} WHERE {safe_col} =
|
|
342
|
+
f"DELETE FROM {table_name} WHERE {safe_col} = {ph}",
|
|
336
343
|
(str(key_val),),
|
|
337
344
|
)
|
|
338
345
|
total += 1
|
|
@@ -348,20 +355,34 @@ class DatasetDbLoader:
|
|
|
348
355
|
if not changed_fields:
|
|
349
356
|
continue
|
|
350
357
|
|
|
351
|
-
# Build SET clause
|
|
358
|
+
# Build SET clause — only include fields that exist in the DB
|
|
352
359
|
set_parts = []
|
|
353
360
|
params: list[Any] = []
|
|
354
361
|
for field_name in changed_fields:
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
362
|
+
if field_mapping is not None:
|
|
363
|
+
if field_name not in field_mapping:
|
|
364
|
+
continue
|
|
365
|
+
db_col = field_mapping[field_name]
|
|
366
|
+
else:
|
|
367
|
+
db_col = field_name
|
|
368
|
+
|
|
369
|
+
if "." in field_name:
|
|
370
|
+
new_val = _extract_dot_value(record, field_name)
|
|
371
|
+
else:
|
|
372
|
+
new_val = record.get(field_name)
|
|
373
|
+
|
|
374
|
+
safe_field = sanitize_identifier(db_col)
|
|
375
|
+
set_parts.append(f"{safe_field} = {ph}")
|
|
358
376
|
params.append(new_val)
|
|
359
377
|
|
|
378
|
+
if not set_parts:
|
|
379
|
+
continue
|
|
380
|
+
|
|
360
381
|
params.append(str(key_val))
|
|
361
382
|
sql = (
|
|
362
383
|
f"UPDATE {table_name} "
|
|
363
384
|
f"SET {', '.join(set_parts)} "
|
|
364
|
-
f"WHERE {safe_col} =
|
|
385
|
+
f"WHERE {safe_col} = {ph}"
|
|
365
386
|
)
|
|
366
387
|
self.adapter.execute(sql, tuple(params))
|
|
367
388
|
total += 1
|
|
@@ -375,6 +396,36 @@ class DatasetDbLoader:
|
|
|
375
396
|
return other.dependency.field
|
|
376
397
|
return None
|
|
377
398
|
|
|
399
|
+
def _get_db_field_mapping(self, source: DatasetSource) -> dict[str, str] | None:
|
|
400
|
+
"""Build mapping of parquet_field -> db_column from db_load.fields.
|
|
401
|
+
|
|
402
|
+
Returns None if no explicit fields configured (all fields allowed).
|
|
403
|
+
"""
|
|
404
|
+
db_load = source.db_load
|
|
405
|
+
if not db_load or not db_load.fields:
|
|
406
|
+
return None
|
|
407
|
+
|
|
408
|
+
mapping: dict[str, str] = {}
|
|
409
|
+
for field_spec in db_load.fields:
|
|
410
|
+
alias = None
|
|
411
|
+
upper = field_spec.upper()
|
|
412
|
+
as_idx = upper.find(" AS ")
|
|
413
|
+
if as_idx != -1:
|
|
414
|
+
alias = field_spec[as_idx + 4:].strip()
|
|
415
|
+
source_field = field_spec[:as_idx].strip()
|
|
416
|
+
else:
|
|
417
|
+
source_field = field_spec
|
|
418
|
+
|
|
419
|
+
col_name = alias or source_field.replace(".", "_")
|
|
420
|
+
mapping[source_field] = col_name
|
|
421
|
+
return mapping
|
|
422
|
+
|
|
423
|
+
def _placeholder(self) -> str:
|
|
424
|
+
"""Get the parameter placeholder for the dialect."""
|
|
425
|
+
if self._dialect == "postgres":
|
|
426
|
+
return "%s"
|
|
427
|
+
return "?"
|
|
428
|
+
|
|
378
429
|
def _auto_id_type(self) -> str:
|
|
379
430
|
"""Get the auto-increment ID column type for the dialect."""
|
|
380
431
|
if self._dialect == "postgres":
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""Tests for dataset DB loader with SQLite in-memory adapter."""
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
-
|
|
5
4
|
import pytest
|
|
6
5
|
|
|
7
6
|
from anysite.dataset.db_loader import DatasetDbLoader, _extract_dot_value, _filter_record
|
|
@@ -736,3 +735,226 @@ class TestAppendSyncMode:
|
|
|
736
735
|
rows = adapter.fetch_all("SELECT * FROM posts")
|
|
737
736
|
assert len(rows) == 1
|
|
738
737
|
assert rows[0]["uid"] == "a"
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
class TestPostgresPlaceholders:
|
|
741
|
+
"""Test that diff-based sync uses %s placeholders for postgres dialect."""
|
|
742
|
+
|
|
743
|
+
def _setup_two_snapshots(self, tmp_path, source_id, old_records, new_records):
|
|
744
|
+
source_dir = get_source_dir(tmp_path / "data", source_id)
|
|
745
|
+
write_parquet(old_records, source_dir / "2026-01-01.parquet")
|
|
746
|
+
write_parquet(new_records, source_dir / "2026-01-02.parquet")
|
|
747
|
+
|
|
748
|
+
def test_delete_uses_percent_s(self, tmp_path):
|
|
749
|
+
"""DELETE query uses %s placeholder for postgres."""
|
|
750
|
+
sources = [
|
|
751
|
+
DatasetSource(
|
|
752
|
+
id="items", endpoint="/api/items",
|
|
753
|
+
db_load=DbLoadConfig(key="name", sync="full"),
|
|
754
|
+
),
|
|
755
|
+
]
|
|
756
|
+
config = _make_config(tmp_path, sources)
|
|
757
|
+
|
|
758
|
+
self._setup_two_snapshots(
|
|
759
|
+
tmp_path, "items",
|
|
760
|
+
old_records=[
|
|
761
|
+
{"name": "Alice", "score": 90},
|
|
762
|
+
{"name": "Bob", "score": 80},
|
|
763
|
+
],
|
|
764
|
+
new_records=[{"name": "Alice", "score": 90}],
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
# Use real SQLite adapter for initial load, then mock for diff sync
|
|
768
|
+
adapter = _sqlite_adapter()
|
|
769
|
+
with adapter:
|
|
770
|
+
source_dir = get_source_dir(tmp_path / "data", "items")
|
|
771
|
+
loader = DatasetDbLoader(config, adapter)
|
|
772
|
+
loader._full_insert(
|
|
773
|
+
sources[0], "items", source_dir / "2026-01-01.parquet"
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
# Patch dialect to postgres and spy on execute
|
|
777
|
+
loader2 = DatasetDbLoader(config, adapter)
|
|
778
|
+
loader2._dialect = "postgres"
|
|
779
|
+
original_execute = adapter.execute
|
|
780
|
+
calls = []
|
|
781
|
+
|
|
782
|
+
def spy_execute(sql, params=None):
|
|
783
|
+
calls.append((sql, params))
|
|
784
|
+
# Replace %s with ? for SQLite execution
|
|
785
|
+
original_execute(sql.replace("%s", "?"), params)
|
|
786
|
+
|
|
787
|
+
adapter.execute = spy_execute
|
|
788
|
+
loader2.load_all()
|
|
789
|
+
|
|
790
|
+
# Verify DELETE used %s
|
|
791
|
+
delete_calls = [c for c in calls if "DELETE" in c[0]]
|
|
792
|
+
assert len(delete_calls) == 1
|
|
793
|
+
assert "%s" in delete_calls[0][0]
|
|
794
|
+
assert "?" not in delete_calls[0][0]
|
|
795
|
+
|
|
796
|
+
def test_update_uses_percent_s(self, tmp_path):
|
|
797
|
+
"""UPDATE query uses %s placeholders for postgres."""
|
|
798
|
+
sources = [
|
|
799
|
+
DatasetSource(
|
|
800
|
+
id="items", endpoint="/api/items",
|
|
801
|
+
db_load=DbLoadConfig(key="name"),
|
|
802
|
+
),
|
|
803
|
+
]
|
|
804
|
+
config = _make_config(tmp_path, sources)
|
|
805
|
+
|
|
806
|
+
self._setup_two_snapshots(
|
|
807
|
+
tmp_path, "items",
|
|
808
|
+
old_records=[{"name": "Alice", "score": 90}],
|
|
809
|
+
new_records=[{"name": "Alice", "score": 95}],
|
|
810
|
+
)
|
|
811
|
+
|
|
812
|
+
adapter = _sqlite_adapter()
|
|
813
|
+
with adapter:
|
|
814
|
+
source_dir = get_source_dir(tmp_path / "data", "items")
|
|
815
|
+
loader = DatasetDbLoader(config, adapter)
|
|
816
|
+
loader._full_insert(
|
|
817
|
+
sources[0], "items", source_dir / "2026-01-01.parquet"
|
|
818
|
+
)
|
|
819
|
+
|
|
820
|
+
loader2 = DatasetDbLoader(config, adapter)
|
|
821
|
+
loader2._dialect = "postgres"
|
|
822
|
+
original_execute = adapter.execute
|
|
823
|
+
calls = []
|
|
824
|
+
|
|
825
|
+
def spy_execute(sql, params=None):
|
|
826
|
+
calls.append((sql, params))
|
|
827
|
+
original_execute(sql.replace("%s", "?"), params)
|
|
828
|
+
|
|
829
|
+
adapter.execute = spy_execute
|
|
830
|
+
loader2.load_all()
|
|
831
|
+
|
|
832
|
+
update_calls = [c for c in calls if "UPDATE" in c[0]]
|
|
833
|
+
assert len(update_calls) == 1
|
|
834
|
+
assert "%s" in update_calls[0][0]
|
|
835
|
+
assert "?" not in update_calls[0][0]
|
|
836
|
+
|
|
837
|
+
|
|
838
|
+
class TestUpdateFieldFiltering:
|
|
839
|
+
"""Test that UPDATE only targets fields present in db_load.fields."""
|
|
840
|
+
|
|
841
|
+
def _setup_two_snapshots(self, tmp_path, source_id, old_records, new_records):
|
|
842
|
+
source_dir = get_source_dir(tmp_path / "data", source_id)
|
|
843
|
+
write_parquet(old_records, source_dir / "2026-01-01.parquet")
|
|
844
|
+
write_parquet(new_records, source_dir / "2026-01-02.parquet")
|
|
845
|
+
|
|
846
|
+
def test_update_only_db_load_fields(self, tmp_path):
|
|
847
|
+
"""UPDATE should skip fields not in db_load.fields."""
|
|
848
|
+
sources = [
|
|
849
|
+
DatasetSource(
|
|
850
|
+
id="items", endpoint="/api/items",
|
|
851
|
+
db_load=DbLoadConfig(key="name", fields=["name", "score"]),
|
|
852
|
+
),
|
|
853
|
+
]
|
|
854
|
+
config = _make_config(tmp_path, sources)
|
|
855
|
+
|
|
856
|
+
self._setup_two_snapshots(
|
|
857
|
+
tmp_path, "items",
|
|
858
|
+
old_records=[{"name": "Alice", "score": 90, "extra": "old"}],
|
|
859
|
+
new_records=[{"name": "Alice", "score": 95, "extra": "new"}],
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
adapter = _sqlite_adapter()
|
|
863
|
+
with adapter:
|
|
864
|
+
# Full insert only creates columns from db_load.fields
|
|
865
|
+
source_dir = get_source_dir(tmp_path / "data", "items")
|
|
866
|
+
loader = DatasetDbLoader(config, adapter)
|
|
867
|
+
loader._full_insert(
|
|
868
|
+
sources[0], "items", source_dir / "2026-01-01.parquet"
|
|
869
|
+
)
|
|
870
|
+
|
|
871
|
+
# Table should only have id, name, score (no extra)
|
|
872
|
+
schema = adapter.get_table_schema("items")
|
|
873
|
+
col_names = [c["name"] for c in schema]
|
|
874
|
+
assert "extra" not in col_names
|
|
875
|
+
assert "score" in col_names
|
|
876
|
+
|
|
877
|
+
# Diff sync — extra changed but should be skipped
|
|
878
|
+
loader2 = DatasetDbLoader(config, adapter)
|
|
879
|
+
results = loader2.load_all()
|
|
880
|
+
assert results["items"] == 1 # score changed
|
|
881
|
+
|
|
882
|
+
rows = adapter.fetch_all("SELECT * FROM items")
|
|
883
|
+
assert rows[0]["score"] == 95
|
|
884
|
+
|
|
885
|
+
def test_update_with_dot_notation_alias(self, tmp_path):
|
|
886
|
+
"""UPDATE uses correct DB column name for aliased dot-notation fields."""
|
|
887
|
+
sources = [
|
|
888
|
+
DatasetSource(
|
|
889
|
+
id="items", endpoint="/api/items",
|
|
890
|
+
db_load=DbLoadConfig(
|
|
891
|
+
key="meta.id",
|
|
892
|
+
fields=["meta.id AS meta_id", "text", "count"],
|
|
893
|
+
),
|
|
894
|
+
),
|
|
895
|
+
]
|
|
896
|
+
config = _make_config(tmp_path, sources)
|
|
897
|
+
|
|
898
|
+
self._setup_two_snapshots(
|
|
899
|
+
tmp_path, "items",
|
|
900
|
+
old_records=[
|
|
901
|
+
{"meta": json.dumps({"id": "x1"}), "text": "hello", "count": 5, "other": "a"},
|
|
902
|
+
],
|
|
903
|
+
new_records=[
|
|
904
|
+
{"meta": json.dumps({"id": "x1"}), "text": "updated", "count": 10, "other": "b"},
|
|
905
|
+
],
|
|
906
|
+
)
|
|
907
|
+
|
|
908
|
+
adapter = _sqlite_adapter()
|
|
909
|
+
with adapter:
|
|
910
|
+
source_dir = get_source_dir(tmp_path / "data", "items")
|
|
911
|
+
loader = DatasetDbLoader(config, adapter)
|
|
912
|
+
loader._full_insert(
|
|
913
|
+
sources[0], "items", source_dir / "2026-01-01.parquet"
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
rows = adapter.fetch_all("SELECT * FROM items")
|
|
917
|
+
assert rows[0]["meta_id"] == "x1"
|
|
918
|
+
assert rows[0]["text"] == "hello"
|
|
919
|
+
|
|
920
|
+
# Diff sync — text and count changed, other should be skipped
|
|
921
|
+
loader2 = DatasetDbLoader(config, adapter)
|
|
922
|
+
results = loader2.load_all()
|
|
923
|
+
|
|
924
|
+
rows = adapter.fetch_all("SELECT * FROM items")
|
|
925
|
+
assert rows[0]["text"] == "updated"
|
|
926
|
+
assert rows[0]["count"] == 10
|
|
927
|
+
|
|
928
|
+
def test_update_skipped_when_no_db_fields_changed(self, tmp_path):
|
|
929
|
+
"""If only non-DB fields changed, no UPDATE should happen."""
|
|
930
|
+
sources = [
|
|
931
|
+
DatasetSource(
|
|
932
|
+
id="items", endpoint="/api/items",
|
|
933
|
+
db_load=DbLoadConfig(key="name", fields=["name", "score"]),
|
|
934
|
+
),
|
|
935
|
+
]
|
|
936
|
+
config = _make_config(tmp_path, sources)
|
|
937
|
+
|
|
938
|
+
self._setup_two_snapshots(
|
|
939
|
+
tmp_path, "items",
|
|
940
|
+
old_records=[{"name": "Alice", "score": 90, "extra": "old"}],
|
|
941
|
+
# score unchanged, only extra changed
|
|
942
|
+
new_records=[{"name": "Alice", "score": 90, "extra": "new"}],
|
|
943
|
+
)
|
|
944
|
+
|
|
945
|
+
adapter = _sqlite_adapter()
|
|
946
|
+
with adapter:
|
|
947
|
+
source_dir = get_source_dir(tmp_path / "data", "items")
|
|
948
|
+
loader = DatasetDbLoader(config, adapter)
|
|
949
|
+
loader._full_insert(
|
|
950
|
+
sources[0], "items", source_dir / "2026-01-01.parquet"
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
loader2 = DatasetDbLoader(config, adapter)
|
|
954
|
+
results = loader2.load_all()
|
|
955
|
+
# extra is not in db_load.fields, so no actual update
|
|
956
|
+
assert results["items"] == 0
|
|
957
|
+
|
|
958
|
+
rows = adapter.fetch_all("SELECT * FROM items")
|
|
959
|
+
assert len(rows) == 1
|
|
960
|
+
assert rows[0]["score"] == 90
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|