anysite-cli 0.1.6__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of anysite-cli might be problematic. Click here for more details.

Files changed (118) hide show
  1. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/PKG-INFO +1 -1
  2. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/pyproject.toml +1 -1
  3. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/dataset/db_loader.py +48 -4
  4. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_dataset/test_db_loader.py +125 -0
  5. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/.claude/settings.local.json +0 -0
  6. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/.gitignore +0 -0
  7. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/CLAUDE.md +0 -0
  8. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/LICENSE +0 -0
  9. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/README.md +0 -0
  10. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/skills/anysite-cli/SKILL.md +0 -0
  11. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/skills/anysite-cli/references/api-reference.md +0 -0
  12. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/skills/anysite-cli/references/dataset-guide.md +0 -0
  13. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/__init__.py +0 -0
  14. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/__main__.py +0 -0
  15. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/api/__init__.py +0 -0
  16. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/api/client.py +0 -0
  17. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/api/errors.py +0 -0
  18. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/api/schemas.py +0 -0
  19. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/batch/__init__.py +0 -0
  20. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/batch/executor.py +0 -0
  21. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/batch/input.py +0 -0
  22. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/batch/rate_limiter.py +0 -0
  23. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/cli/__init__.py +0 -0
  24. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/cli/config.py +0 -0
  25. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/cli/executor.py +0 -0
  26. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/cli/options.py +0 -0
  27. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/config/__init__.py +0 -0
  28. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/config/paths.py +0 -0
  29. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/config/settings.py +0 -0
  30. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/dataset/__init__.py +0 -0
  31. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/dataset/analyzer.py +0 -0
  32. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/dataset/cli.py +0 -0
  33. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/dataset/collector.py +0 -0
  34. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/dataset/differ.py +0 -0
  35. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/dataset/errors.py +0 -0
  36. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/dataset/exporters.py +0 -0
  37. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/dataset/history.py +0 -0
  38. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/dataset/models.py +0 -0
  39. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/dataset/notifications.py +0 -0
  40. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/dataset/scheduler.py +0 -0
  41. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/dataset/storage.py +0 -0
  42. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/dataset/transformer.py +0 -0
  43. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/db/__init__.py +0 -0
  44. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/db/adapters/__init__.py +0 -0
  45. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/db/adapters/base.py +0 -0
  46. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/db/adapters/postgres.py +0 -0
  47. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/db/adapters/sqlite.py +0 -0
  48. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/db/cli.py +0 -0
  49. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/db/config.py +0 -0
  50. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/db/manager.py +0 -0
  51. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/db/operations/__init__.py +0 -0
  52. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/db/operations/insert.py +0 -0
  53. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/db/operations/query.py +0 -0
  54. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/db/schema/__init__.py +0 -0
  55. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/db/schema/inference.py +0 -0
  56. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/db/schema/types.py +0 -0
  57. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/db/utils/__init__.py +0 -0
  58. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/db/utils/sanitize.py +0 -0
  59. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/main.py +0 -0
  60. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/models/__init__.py +0 -0
  61. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/output/__init__.py +0 -0
  62. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/output/console.py +0 -0
  63. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/output/formatters.py +0 -0
  64. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/output/templates.py +0 -0
  65. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/py.typed +0 -0
  66. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/streaming/__init__.py +0 -0
  67. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/streaming/progress.py +0 -0
  68. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/streaming/writer.py +0 -0
  69. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/utils/__init__.py +0 -0
  70. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/utils/fields.py +0 -0
  71. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/src/anysite/utils/retry.py +0 -0
  72. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/test_data/enriched_partners_sample_10.csv +0 -0
  73. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/test_data/linkedin-partners/company_aliases.txt +0 -0
  74. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/test_data/linkedin-partners/dataset.yaml +0 -0
  75. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/test_data/partners-deep/dataset.yaml +0 -0
  76. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/test_data/partners-intel/dataset.yaml +0 -0
  77. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/test_data/partners-linkedin/company_aliases.txt +0 -0
  78. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/test_data/partners-linkedin/dataset.yaml +0 -0
  79. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/test_data/partners-pipeline/dataset.yaml +0 -0
  80. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/__init__.py +0 -0
  81. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/conftest.py +0 -0
  82. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_api/__init__.py +0 -0
  83. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_batch/__init__.py +0 -0
  84. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_batch/test_executor.py +0 -0
  85. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_batch/test_input.py +0 -0
  86. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_batch/test_rate_limiter.py +0 -0
  87. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_cli/__init__.py +0 -0
  88. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_cli/test_main.py +0 -0
  89. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_dataset/__init__.py +0 -0
  90. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_dataset/test_analyzer.py +0 -0
  91. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_dataset/test_collector.py +0 -0
  92. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_dataset/test_differ.py +0 -0
  93. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_dataset/test_exporters.py +0 -0
  94. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_dataset/test_history.py +0 -0
  95. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_dataset/test_integration_csv.py +0 -0
  96. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_dataset/test_models.py +0 -0
  97. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_dataset/test_notifications.py +0 -0
  98. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_dataset/test_scheduler.py +0 -0
  99. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_dataset/test_storage.py +0 -0
  100. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_dataset/test_transformer.py +0 -0
  101. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_db/__init__.py +0 -0
  102. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_db/test_cli.py +0 -0
  103. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_db/test_config.py +0 -0
  104. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_db/test_inference.py +0 -0
  105. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_db/test_insert.py +0 -0
  106. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_db/test_manager.py +0 -0
  107. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_db/test_postgres_adapter.py +0 -0
  108. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_db/test_sanitize.py +0 -0
  109. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_db/test_sqlite_adapter.py +0 -0
  110. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_output/__init__.py +0 -0
  111. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_output/test_formatters.py +0 -0
  112. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_output/test_templates.py +0 -0
  113. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_streaming/__init__.py +0 -0
  114. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_streaming/test_progress.py +0 -0
  115. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_streaming/test_writer.py +0 -0
  116. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_utils/__init__.py +0 -0
  117. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_utils/test_fields.py +0 -0
  118. {anysite_cli-0.1.6 → anysite_cli-0.1.7}/tests/test_utils/test_retry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: anysite-cli
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: CLI for Anysite API - web data extraction for humans and AI agents
5
5
  Project-URL: Homepage, https://anysite.io
6
6
  Project-URL: Documentation, https://docs.anysite.io/cli
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "anysite-cli"
7
- version = "0.1.6"
7
+ version = "0.1.7"
8
8
  description = "CLI for Anysite API - web data extraction for humans and AI agents"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -315,8 +315,14 @@ class DatasetDbLoader:
315
315
  return _extract_dot_value(record, diff_key)
316
316
  return record.get(diff_key)
317
317
 
318
+ # Build field mapping for db_load.fields filtering
319
+ field_mapping = self._get_db_field_mapping(source)
320
+
318
321
  # Determine the DB column name for the key
319
- db_key_col = diff_key.replace(".", "_")
322
+ if field_mapping and diff_key in field_mapping:
323
+ db_key_col = field_mapping[diff_key]
324
+ else:
325
+ db_key_col = diff_key.replace(".", "_")
320
326
 
321
327
  # INSERT added records
322
328
  if result.added:
@@ -349,15 +355,29 @@ class DatasetDbLoader:
349
355
  if not changed_fields:
350
356
  continue
351
357
 
352
- # Build SET clause from changed fields
358
+ # Build SET clause only include fields that exist in the DB
353
359
  set_parts = []
354
360
  params: list[Any] = []
355
361
  for field_name in changed_fields:
356
- new_val = record.get(field_name)
357
- safe_field = sanitize_identifier(field_name)
362
+ if field_mapping is not None:
363
+ if field_name not in field_mapping:
364
+ continue
365
+ db_col = field_mapping[field_name]
366
+ else:
367
+ db_col = field_name
368
+
369
+ if "." in field_name:
370
+ new_val = _extract_dot_value(record, field_name)
371
+ else:
372
+ new_val = record.get(field_name)
373
+
374
+ safe_field = sanitize_identifier(db_col)
358
375
  set_parts.append(f"{safe_field} = {ph}")
359
376
  params.append(new_val)
360
377
 
378
+ if not set_parts:
379
+ continue
380
+
361
381
  params.append(str(key_val))
362
382
  sql = (
363
383
  f"UPDATE {table_name} "
@@ -376,6 +396,30 @@ class DatasetDbLoader:
376
396
  return other.dependency.field
377
397
  return None
378
398
 
399
+ def _get_db_field_mapping(self, source: DatasetSource) -> dict[str, str] | None:
400
+ """Build mapping of parquet_field -> db_column from db_load.fields.
401
+
402
+ Returns None if no explicit fields configured (all fields allowed).
403
+ """
404
+ db_load = source.db_load
405
+ if not db_load or not db_load.fields:
406
+ return None
407
+
408
+ mapping: dict[str, str] = {}
409
+ for field_spec in db_load.fields:
410
+ alias = None
411
+ upper = field_spec.upper()
412
+ as_idx = upper.find(" AS ")
413
+ if as_idx != -1:
414
+ alias = field_spec[as_idx + 4:].strip()
415
+ source_field = field_spec[:as_idx].strip()
416
+ else:
417
+ source_field = field_spec
418
+
419
+ col_name = alias or source_field.replace(".", "_")
420
+ mapping[source_field] = col_name
421
+ return mapping
422
+
379
423
  def _placeholder(self) -> str:
380
424
  """Get the parameter placeholder for the dialect."""
381
425
  if self._dialect == "postgres":
@@ -833,3 +833,128 @@ class TestPostgresPlaceholders:
833
833
  assert len(update_calls) == 1
834
834
  assert "%s" in update_calls[0][0]
835
835
  assert "?" not in update_calls[0][0]
836
+
837
+
838
+ class TestUpdateFieldFiltering:
839
+ """Test that UPDATE only targets fields present in db_load.fields."""
840
+
841
+ def _setup_two_snapshots(self, tmp_path, source_id, old_records, new_records):
842
+ source_dir = get_source_dir(tmp_path / "data", source_id)
843
+ write_parquet(old_records, source_dir / "2026-01-01.parquet")
844
+ write_parquet(new_records, source_dir / "2026-01-02.parquet")
845
+
846
+ def test_update_only_db_load_fields(self, tmp_path):
847
+ """UPDATE should skip fields not in db_load.fields."""
848
+ sources = [
849
+ DatasetSource(
850
+ id="items", endpoint="/api/items",
851
+ db_load=DbLoadConfig(key="name", fields=["name", "score"]),
852
+ ),
853
+ ]
854
+ config = _make_config(tmp_path, sources)
855
+
856
+ self._setup_two_snapshots(
857
+ tmp_path, "items",
858
+ old_records=[{"name": "Alice", "score": 90, "extra": "old"}],
859
+ new_records=[{"name": "Alice", "score": 95, "extra": "new"}],
860
+ )
861
+
862
+ adapter = _sqlite_adapter()
863
+ with adapter:
864
+ # Full insert only creates columns from db_load.fields
865
+ source_dir = get_source_dir(tmp_path / "data", "items")
866
+ loader = DatasetDbLoader(config, adapter)
867
+ loader._full_insert(
868
+ sources[0], "items", source_dir / "2026-01-01.parquet"
869
+ )
870
+
871
+ # Table should only have id, name, score (no extra)
872
+ schema = adapter.get_table_schema("items")
873
+ col_names = [c["name"] for c in schema]
874
+ assert "extra" not in col_names
875
+ assert "score" in col_names
876
+
877
+ # Diff sync — extra changed but should be skipped
878
+ loader2 = DatasetDbLoader(config, adapter)
879
+ results = loader2.load_all()
880
+ assert results["items"] == 1 # score changed
881
+
882
+ rows = adapter.fetch_all("SELECT * FROM items")
883
+ assert rows[0]["score"] == 95
884
+
885
+ def test_update_with_dot_notation_alias(self, tmp_path):
886
+ """UPDATE uses correct DB column name for aliased dot-notation fields."""
887
+ sources = [
888
+ DatasetSource(
889
+ id="items", endpoint="/api/items",
890
+ db_load=DbLoadConfig(
891
+ key="meta.id",
892
+ fields=["meta.id AS meta_id", "text", "count"],
893
+ ),
894
+ ),
895
+ ]
896
+ config = _make_config(tmp_path, sources)
897
+
898
+ self._setup_two_snapshots(
899
+ tmp_path, "items",
900
+ old_records=[
901
+ {"meta": json.dumps({"id": "x1"}), "text": "hello", "count": 5, "other": "a"},
902
+ ],
903
+ new_records=[
904
+ {"meta": json.dumps({"id": "x1"}), "text": "updated", "count": 10, "other": "b"},
905
+ ],
906
+ )
907
+
908
+ adapter = _sqlite_adapter()
909
+ with adapter:
910
+ source_dir = get_source_dir(tmp_path / "data", "items")
911
+ loader = DatasetDbLoader(config, adapter)
912
+ loader._full_insert(
913
+ sources[0], "items", source_dir / "2026-01-01.parquet"
914
+ )
915
+
916
+ rows = adapter.fetch_all("SELECT * FROM items")
917
+ assert rows[0]["meta_id"] == "x1"
918
+ assert rows[0]["text"] == "hello"
919
+
920
+ # Diff sync — text and count changed, other should be skipped
921
+ loader2 = DatasetDbLoader(config, adapter)
922
+ results = loader2.load_all()
923
+
924
+ rows = adapter.fetch_all("SELECT * FROM items")
925
+ assert rows[0]["text"] == "updated"
926
+ assert rows[0]["count"] == 10
927
+
928
+ def test_update_skipped_when_no_db_fields_changed(self, tmp_path):
929
+ """If only non-DB fields changed, no UPDATE should happen."""
930
+ sources = [
931
+ DatasetSource(
932
+ id="items", endpoint="/api/items",
933
+ db_load=DbLoadConfig(key="name", fields=["name", "score"]),
934
+ ),
935
+ ]
936
+ config = _make_config(tmp_path, sources)
937
+
938
+ self._setup_two_snapshots(
939
+ tmp_path, "items",
940
+ old_records=[{"name": "Alice", "score": 90, "extra": "old"}],
941
+ # score unchanged, only extra changed
942
+ new_records=[{"name": "Alice", "score": 90, "extra": "new"}],
943
+ )
944
+
945
+ adapter = _sqlite_adapter()
946
+ with adapter:
947
+ source_dir = get_source_dir(tmp_path / "data", "items")
948
+ loader = DatasetDbLoader(config, adapter)
949
+ loader._full_insert(
950
+ sources[0], "items", source_dir / "2026-01-01.parquet"
951
+ )
952
+
953
+ loader2 = DatasetDbLoader(config, adapter)
954
+ results = loader2.load_all()
955
+ # extra is not in db_load.fields, so no actual update
956
+ assert results["items"] == 0
957
+
958
+ rows = adapter.fetch_all("SELECT * FROM items")
959
+ assert len(rows) == 1
960
+ assert rows[0]["score"] == 90
File without changes
File without changes
File without changes
File without changes