anysite-cli 0.1.5__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/PKG-INFO +1 -1
  2. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/pyproject.toml +1 -1
  3. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/db_loader.py +58 -7
  4. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_db_loader.py +223 -1
  5. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/.claude/settings.local.json +0 -0
  6. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/.gitignore +0 -0
  7. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/CLAUDE.md +0 -0
  8. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/LICENSE +0 -0
  9. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/README.md +0 -0
  10. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/skills/anysite-cli/SKILL.md +0 -0
  11. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/skills/anysite-cli/references/api-reference.md +0 -0
  12. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/skills/anysite-cli/references/dataset-guide.md +0 -0
  13. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/__init__.py +0 -0
  14. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/__main__.py +0 -0
  15. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/api/__init__.py +0 -0
  16. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/api/client.py +0 -0
  17. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/api/errors.py +0 -0
  18. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/api/schemas.py +0 -0
  19. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/batch/__init__.py +0 -0
  20. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/batch/executor.py +0 -0
  21. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/batch/input.py +0 -0
  22. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/batch/rate_limiter.py +0 -0
  23. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/cli/__init__.py +0 -0
  24. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/cli/config.py +0 -0
  25. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/cli/executor.py +0 -0
  26. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/cli/options.py +0 -0
  27. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/config/__init__.py +0 -0
  28. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/config/paths.py +0 -0
  29. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/config/settings.py +0 -0
  30. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/__init__.py +0 -0
  31. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/analyzer.py +0 -0
  32. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/cli.py +0 -0
  33. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/collector.py +0 -0
  34. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/differ.py +0 -0
  35. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/errors.py +0 -0
  36. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/exporters.py +0 -0
  37. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/history.py +0 -0
  38. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/models.py +0 -0
  39. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/notifications.py +0 -0
  40. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/scheduler.py +0 -0
  41. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/storage.py +0 -0
  42. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/dataset/transformer.py +0 -0
  43. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/__init__.py +0 -0
  44. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/adapters/__init__.py +0 -0
  45. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/adapters/base.py +0 -0
  46. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/adapters/postgres.py +0 -0
  47. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/adapters/sqlite.py +0 -0
  48. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/cli.py +0 -0
  49. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/config.py +0 -0
  50. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/manager.py +0 -0
  51. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/operations/__init__.py +0 -0
  52. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/operations/insert.py +0 -0
  53. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/operations/query.py +0 -0
  54. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/schema/__init__.py +0 -0
  55. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/schema/inference.py +0 -0
  56. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/schema/types.py +0 -0
  57. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/utils/__init__.py +0 -0
  58. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/db/utils/sanitize.py +0 -0
  59. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/main.py +0 -0
  60. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/models/__init__.py +0 -0
  61. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/output/__init__.py +0 -0
  62. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/output/console.py +0 -0
  63. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/output/formatters.py +0 -0
  64. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/output/templates.py +0 -0
  65. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/py.typed +0 -0
  66. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/streaming/__init__.py +0 -0
  67. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/streaming/progress.py +0 -0
  68. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/streaming/writer.py +0 -0
  69. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/utils/__init__.py +0 -0
  70. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/utils/fields.py +0 -0
  71. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/src/anysite/utils/retry.py +0 -0
  72. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/test_data/enriched_partners_sample_10.csv +0 -0
  73. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/test_data/linkedin-partners/company_aliases.txt +0 -0
  74. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/test_data/linkedin-partners/dataset.yaml +0 -0
  75. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/test_data/partners-deep/dataset.yaml +0 -0
  76. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/test_data/partners-intel/dataset.yaml +0 -0
  77. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/test_data/partners-linkedin/company_aliases.txt +0 -0
  78. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/test_data/partners-linkedin/dataset.yaml +0 -0
  79. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/test_data/partners-pipeline/dataset.yaml +0 -0
  80. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/__init__.py +0 -0
  81. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/conftest.py +0 -0
  82. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_api/__init__.py +0 -0
  83. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_batch/__init__.py +0 -0
  84. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_batch/test_executor.py +0 -0
  85. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_batch/test_input.py +0 -0
  86. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_batch/test_rate_limiter.py +0 -0
  87. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_cli/__init__.py +0 -0
  88. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_cli/test_main.py +0 -0
  89. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/__init__.py +0 -0
  90. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_analyzer.py +0 -0
  91. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_collector.py +0 -0
  92. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_differ.py +0 -0
  93. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_exporters.py +0 -0
  94. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_history.py +0 -0
  95. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_integration_csv.py +0 -0
  96. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_models.py +0 -0
  97. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_notifications.py +0 -0
  98. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_scheduler.py +0 -0
  99. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_storage.py +0 -0
  100. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_dataset/test_transformer.py +0 -0
  101. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/__init__.py +0 -0
  102. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/test_cli.py +0 -0
  103. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/test_config.py +0 -0
  104. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/test_inference.py +0 -0
  105. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/test_insert.py +0 -0
  106. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/test_manager.py +0 -0
  107. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/test_postgres_adapter.py +0 -0
  108. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/test_sanitize.py +0 -0
  109. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_db/test_sqlite_adapter.py +0 -0
  110. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_output/__init__.py +0 -0
  111. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_output/test_formatters.py +0 -0
  112. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_output/test_templates.py +0 -0
  113. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_streaming/__init__.py +0 -0
  114. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_streaming/test_progress.py +0 -0
  115. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_streaming/test_writer.py +0 -0
  116. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_utils/__init__.py +0 -0
  117. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_utils/test_fields.py +0 -0
  118. {anysite_cli-0.1.5 → anysite_cli-0.1.7}/tests/test_utils/test_retry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: anysite-cli
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: CLI for Anysite API - web data extraction for humans and AI agents
5
5
  Project-URL: Homepage, https://anysite.io
6
6
  Project-URL: Documentation, https://docs.anysite.io/cli
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "anysite-cli"
7
- version = "0.1.5"
7
+ version = "0.1.7"
8
8
  description = "CLI for Anysite API - web data extraction for humans and AI agents"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -315,8 +315,14 @@ class DatasetDbLoader:
315
315
  return _extract_dot_value(record, diff_key)
316
316
  return record.get(diff_key)
317
317
 
318
+ # Build field mapping for db_load.fields filtering
319
+ field_mapping = self._get_db_field_mapping(source)
320
+
318
321
  # Determine the DB column name for the key
319
- db_key_col = diff_key.replace(".", "_")
322
+ if field_mapping and diff_key in field_mapping:
323
+ db_key_col = field_mapping[diff_key]
324
+ else:
325
+ db_key_col = diff_key.replace(".", "_")
320
326
 
321
327
  # INSERT added records
322
328
  if result.added:
@@ -326,13 +332,14 @@ class DatasetDbLoader:
326
332
  total += 1
327
333
 
328
334
  # DELETE removed records (skipped in append mode)
335
+ ph = self._placeholder()
329
336
  if result.removed and sync_mode == "full":
330
337
  safe_col = sanitize_identifier(db_key_col)
331
338
  for record in result.removed:
332
339
  key_val = _get_key_val(record)
333
340
  if key_val is not None:
334
341
  self.adapter.execute(
335
- f"DELETE FROM {table_name} WHERE {safe_col} = ?",
342
+ f"DELETE FROM {table_name} WHERE {safe_col} = {ph}",
336
343
  (str(key_val),),
337
344
  )
338
345
  total += 1
@@ -348,20 +355,34 @@ class DatasetDbLoader:
348
355
  if not changed_fields:
349
356
  continue
350
357
 
351
- # Build SET clause from changed fields
358
+ # Build SET clause only include fields that exist in the DB
352
359
  set_parts = []
353
360
  params: list[Any] = []
354
361
  for field_name in changed_fields:
355
- new_val = record.get(field_name)
356
- safe_field = sanitize_identifier(field_name)
357
- set_parts.append(f"{safe_field} = ?")
362
+ if field_mapping is not None:
363
+ if field_name not in field_mapping:
364
+ continue
365
+ db_col = field_mapping[field_name]
366
+ else:
367
+ db_col = field_name
368
+
369
+ if "." in field_name:
370
+ new_val = _extract_dot_value(record, field_name)
371
+ else:
372
+ new_val = record.get(field_name)
373
+
374
+ safe_field = sanitize_identifier(db_col)
375
+ set_parts.append(f"{safe_field} = {ph}")
358
376
  params.append(new_val)
359
377
 
378
+ if not set_parts:
379
+ continue
380
+
360
381
  params.append(str(key_val))
361
382
  sql = (
362
383
  f"UPDATE {table_name} "
363
384
  f"SET {', '.join(set_parts)} "
364
- f"WHERE {safe_col} = ?"
385
+ f"WHERE {safe_col} = {ph}"
365
386
  )
366
387
  self.adapter.execute(sql, tuple(params))
367
388
  total += 1
@@ -375,6 +396,36 @@ class DatasetDbLoader:
375
396
  return other.dependency.field
376
397
  return None
377
398
 
399
+ def _get_db_field_mapping(self, source: DatasetSource) -> dict[str, str] | None:
400
+ """Build mapping of parquet_field -> db_column from db_load.fields.
401
+
402
+ Returns None if no explicit fields configured (all fields allowed).
403
+ """
404
+ db_load = source.db_load
405
+ if not db_load or not db_load.fields:
406
+ return None
407
+
408
+ mapping: dict[str, str] = {}
409
+ for field_spec in db_load.fields:
410
+ alias = None
411
+ upper = field_spec.upper()
412
+ as_idx = upper.find(" AS ")
413
+ if as_idx != -1:
414
+ alias = field_spec[as_idx + 4:].strip()
415
+ source_field = field_spec[:as_idx].strip()
416
+ else:
417
+ source_field = field_spec
418
+
419
+ col_name = alias or source_field.replace(".", "_")
420
+ mapping[source_field] = col_name
421
+ return mapping
422
+
423
+ def _placeholder(self) -> str:
424
+ """Get the parameter placeholder for the dialect."""
425
+ if self._dialect == "postgres":
426
+ return "%s"
427
+ return "?"
428
+
378
429
  def _auto_id_type(self) -> str:
379
430
  """Get the auto-increment ID column type for the dialect."""
380
431
  if self._dialect == "postgres":
@@ -1,7 +1,6 @@
1
1
  """Tests for dataset DB loader with SQLite in-memory adapter."""
2
2
 
3
3
  import json
4
-
5
4
  import pytest
6
5
 
7
6
  from anysite.dataset.db_loader import DatasetDbLoader, _extract_dot_value, _filter_record
@@ -736,3 +735,226 @@ class TestAppendSyncMode:
736
735
  rows = adapter.fetch_all("SELECT * FROM posts")
737
736
  assert len(rows) == 1
738
737
  assert rows[0]["uid"] == "a"
738
+
739
+
740
+ class TestPostgresPlaceholders:
741
+ """Test that diff-based sync uses %s placeholders for postgres dialect."""
742
+
743
+ def _setup_two_snapshots(self, tmp_path, source_id, old_records, new_records):
744
+ source_dir = get_source_dir(tmp_path / "data", source_id)
745
+ write_parquet(old_records, source_dir / "2026-01-01.parquet")
746
+ write_parquet(new_records, source_dir / "2026-01-02.parquet")
747
+
748
+ def test_delete_uses_percent_s(self, tmp_path):
749
+ """DELETE query uses %s placeholder for postgres."""
750
+ sources = [
751
+ DatasetSource(
752
+ id="items", endpoint="/api/items",
753
+ db_load=DbLoadConfig(key="name", sync="full"),
754
+ ),
755
+ ]
756
+ config = _make_config(tmp_path, sources)
757
+
758
+ self._setup_two_snapshots(
759
+ tmp_path, "items",
760
+ old_records=[
761
+ {"name": "Alice", "score": 90},
762
+ {"name": "Bob", "score": 80},
763
+ ],
764
+ new_records=[{"name": "Alice", "score": 90}],
765
+ )
766
+
767
+ # Use real SQLite adapter for initial load, then mock for diff sync
768
+ adapter = _sqlite_adapter()
769
+ with adapter:
770
+ source_dir = get_source_dir(tmp_path / "data", "items")
771
+ loader = DatasetDbLoader(config, adapter)
772
+ loader._full_insert(
773
+ sources[0], "items", source_dir / "2026-01-01.parquet"
774
+ )
775
+
776
+ # Patch dialect to postgres and spy on execute
777
+ loader2 = DatasetDbLoader(config, adapter)
778
+ loader2._dialect = "postgres"
779
+ original_execute = adapter.execute
780
+ calls = []
781
+
782
+ def spy_execute(sql, params=None):
783
+ calls.append((sql, params))
784
+ # Replace %s with ? for SQLite execution
785
+ original_execute(sql.replace("%s", "?"), params)
786
+
787
+ adapter.execute = spy_execute
788
+ loader2.load_all()
789
+
790
+ # Verify DELETE used %s
791
+ delete_calls = [c for c in calls if "DELETE" in c[0]]
792
+ assert len(delete_calls) == 1
793
+ assert "%s" in delete_calls[0][0]
794
+ assert "?" not in delete_calls[0][0]
795
+
796
+ def test_update_uses_percent_s(self, tmp_path):
797
+ """UPDATE query uses %s placeholders for postgres."""
798
+ sources = [
799
+ DatasetSource(
800
+ id="items", endpoint="/api/items",
801
+ db_load=DbLoadConfig(key="name"),
802
+ ),
803
+ ]
804
+ config = _make_config(tmp_path, sources)
805
+
806
+ self._setup_two_snapshots(
807
+ tmp_path, "items",
808
+ old_records=[{"name": "Alice", "score": 90}],
809
+ new_records=[{"name": "Alice", "score": 95}],
810
+ )
811
+
812
+ adapter = _sqlite_adapter()
813
+ with adapter:
814
+ source_dir = get_source_dir(tmp_path / "data", "items")
815
+ loader = DatasetDbLoader(config, adapter)
816
+ loader._full_insert(
817
+ sources[0], "items", source_dir / "2026-01-01.parquet"
818
+ )
819
+
820
+ loader2 = DatasetDbLoader(config, adapter)
821
+ loader2._dialect = "postgres"
822
+ original_execute = adapter.execute
823
+ calls = []
824
+
825
+ def spy_execute(sql, params=None):
826
+ calls.append((sql, params))
827
+ original_execute(sql.replace("%s", "?"), params)
828
+
829
+ adapter.execute = spy_execute
830
+ loader2.load_all()
831
+
832
+ update_calls = [c for c in calls if "UPDATE" in c[0]]
833
+ assert len(update_calls) == 1
834
+ assert "%s" in update_calls[0][0]
835
+ assert "?" not in update_calls[0][0]
836
+
837
+
838
+ class TestUpdateFieldFiltering:
839
+ """Test that UPDATE only targets fields present in db_load.fields."""
840
+
841
+ def _setup_two_snapshots(self, tmp_path, source_id, old_records, new_records):
842
+ source_dir = get_source_dir(tmp_path / "data", source_id)
843
+ write_parquet(old_records, source_dir / "2026-01-01.parquet")
844
+ write_parquet(new_records, source_dir / "2026-01-02.parquet")
845
+
846
+ def test_update_only_db_load_fields(self, tmp_path):
847
+ """UPDATE should skip fields not in db_load.fields."""
848
+ sources = [
849
+ DatasetSource(
850
+ id="items", endpoint="/api/items",
851
+ db_load=DbLoadConfig(key="name", fields=["name", "score"]),
852
+ ),
853
+ ]
854
+ config = _make_config(tmp_path, sources)
855
+
856
+ self._setup_two_snapshots(
857
+ tmp_path, "items",
858
+ old_records=[{"name": "Alice", "score": 90, "extra": "old"}],
859
+ new_records=[{"name": "Alice", "score": 95, "extra": "new"}],
860
+ )
861
+
862
+ adapter = _sqlite_adapter()
863
+ with adapter:
864
+ # Full insert only creates columns from db_load.fields
865
+ source_dir = get_source_dir(tmp_path / "data", "items")
866
+ loader = DatasetDbLoader(config, adapter)
867
+ loader._full_insert(
868
+ sources[0], "items", source_dir / "2026-01-01.parquet"
869
+ )
870
+
871
+ # Table should only have id, name, score (no extra)
872
+ schema = adapter.get_table_schema("items")
873
+ col_names = [c["name"] for c in schema]
874
+ assert "extra" not in col_names
875
+ assert "score" in col_names
876
+
877
+ # Diff sync — extra changed but should be skipped
878
+ loader2 = DatasetDbLoader(config, adapter)
879
+ results = loader2.load_all()
880
+ assert results["items"] == 1 # score changed
881
+
882
+ rows = adapter.fetch_all("SELECT * FROM items")
883
+ assert rows[0]["score"] == 95
884
+
885
+ def test_update_with_dot_notation_alias(self, tmp_path):
886
+ """UPDATE uses correct DB column name for aliased dot-notation fields."""
887
+ sources = [
888
+ DatasetSource(
889
+ id="items", endpoint="/api/items",
890
+ db_load=DbLoadConfig(
891
+ key="meta.id",
892
+ fields=["meta.id AS meta_id", "text", "count"],
893
+ ),
894
+ ),
895
+ ]
896
+ config = _make_config(tmp_path, sources)
897
+
898
+ self._setup_two_snapshots(
899
+ tmp_path, "items",
900
+ old_records=[
901
+ {"meta": json.dumps({"id": "x1"}), "text": "hello", "count": 5, "other": "a"},
902
+ ],
903
+ new_records=[
904
+ {"meta": json.dumps({"id": "x1"}), "text": "updated", "count": 10, "other": "b"},
905
+ ],
906
+ )
907
+
908
+ adapter = _sqlite_adapter()
909
+ with adapter:
910
+ source_dir = get_source_dir(tmp_path / "data", "items")
911
+ loader = DatasetDbLoader(config, adapter)
912
+ loader._full_insert(
913
+ sources[0], "items", source_dir / "2026-01-01.parquet"
914
+ )
915
+
916
+ rows = adapter.fetch_all("SELECT * FROM items")
917
+ assert rows[0]["meta_id"] == "x1"
918
+ assert rows[0]["text"] == "hello"
919
+
920
+ # Diff sync — text and count changed, other should be skipped
921
+ loader2 = DatasetDbLoader(config, adapter)
922
+ results = loader2.load_all()
923
+
924
+ rows = adapter.fetch_all("SELECT * FROM items")
925
+ assert rows[0]["text"] == "updated"
926
+ assert rows[0]["count"] == 10
927
+
928
+ def test_update_skipped_when_no_db_fields_changed(self, tmp_path):
929
+ """If only non-DB fields changed, no UPDATE should happen."""
930
+ sources = [
931
+ DatasetSource(
932
+ id="items", endpoint="/api/items",
933
+ db_load=DbLoadConfig(key="name", fields=["name", "score"]),
934
+ ),
935
+ ]
936
+ config = _make_config(tmp_path, sources)
937
+
938
+ self._setup_two_snapshots(
939
+ tmp_path, "items",
940
+ old_records=[{"name": "Alice", "score": 90, "extra": "old"}],
941
+ # score unchanged, only extra changed
942
+ new_records=[{"name": "Alice", "score": 90, "extra": "new"}],
943
+ )
944
+
945
+ adapter = _sqlite_adapter()
946
+ with adapter:
947
+ source_dir = get_source_dir(tmp_path / "data", "items")
948
+ loader = DatasetDbLoader(config, adapter)
949
+ loader._full_insert(
950
+ sources[0], "items", source_dir / "2026-01-01.parquet"
951
+ )
952
+
953
+ loader2 = DatasetDbLoader(config, adapter)
954
+ results = loader2.load_all()
955
+ # extra is not in db_load.fields, so no actual update
956
+ assert results["items"] == 0
957
+
958
+ rows = adapter.fetch_all("SELECT * FROM items")
959
+ assert len(rows) == 1
960
+ assert rows[0]["score"] == 90
File without changes
File without changes
File without changes
File without changes