data-collection-framework 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. data_collection_framework-0.1.0.dist-info/METADATA +19 -0
  2. data_collection_framework-0.1.0.dist-info/RECORD +44 -0
  3. data_collection_framework-0.1.0.dist-info/WHEEL +5 -0
  4. data_collection_framework-0.1.0.dist-info/entry_points.txt +2 -0
  5. data_collection_framework-0.1.0.dist-info/top_level.txt +1 -0
  6. dcf/__init__.py +4 -0
  7. dcf/cli.py +841 -0
  8. dcf/config/__init__.py +4 -0
  9. dcf/config/loader.py +77 -0
  10. dcf/config/models.py +240 -0
  11. dcf/engine/__init__.py +6 -0
  12. dcf/engine/fetcher.py +118 -0
  13. dcf/engine/iterator.py +96 -0
  14. dcf/engine/projector.py +56 -0
  15. dcf/engine/runner.py +90 -0
  16. dcf/engine/transforms.py +41 -0
  17. dcf/gcp/__init__.py +0 -0
  18. dcf/gcp/_collector_utils.py +87 -0
  19. dcf/gcp/auth.py +1 -0
  20. dcf/gcp/batch_deploy.py +548 -0
  21. dcf/gcp/bootstrap.py +131 -0
  22. dcf/gcp/gcloud.py +42 -0
  23. dcf/gcp/terraform.py +151 -0
  24. dcf/infra/modules/batch_collector/gcp/airflow/main.tf +194 -0
  25. dcf/infra/modules/batch_collector/gcp/airflow/outputs.tf +9 -0
  26. dcf/infra/modules/batch_collector/gcp/airflow/variables.tf +52 -0
  27. dcf/infra/modules/batch_collector/gcp/main.tf +70 -0
  28. dcf/infra/modules/batch_collector/gcp/outputs.tf +4 -0
  29. dcf/infra/modules/batch_collector/gcp/variables.tf +40 -0
  30. dcf/infra/modules/batch_collector/local/airflow/main.tf +64 -0
  31. dcf/infra/modules/batch_collector/local/airflow/outputs.tf +9 -0
  32. dcf/infra/modules/batch_collector/local/airflow/variables.tf +59 -0
  33. dcf/infra/modules/batch_collector/local/main.tf +32 -0
  34. dcf/infra/modules/batch_collector/local/outputs.tf +4 -0
  35. dcf/infra/modules/batch_collector/local/variables.tf +25 -0
  36. dcf/infra/templates/airflow.Dockerfile.tftpl +6 -0
  37. dcf/infra/templates/batch_collector.Dockerfile.tftpl +14 -0
  38. dcf/infra/templates/docker-compose.yml.tftpl +76 -0
  39. dcf/local_deploy.py +756 -0
  40. dcf/project.py +23 -0
  41. dcf/spark_session.py +66 -0
  42. dcf/warehouse_reader.py +323 -0
  43. dcf/writer/__init__.py +3 -0
  44. dcf/writer/iceberg.py +315 -0
@@ -0,0 +1,19 @@
1
+ Metadata-Version: 2.4
2
+ Name: data-collection-framework
3
+ Version: 0.1.0
4
+ Requires-Python: >=3.12
5
+ Requires-Dist: pyspark==4.0.2
6
+ Requires-Dist: pandas
7
+ Requires-Dist: requests
8
+ Requires-Dist: pyarrow
9
+ Requires-Dist: pytz
10
+ Requires-Dist: pydantic>=2
11
+ Requires-Dist: typer[all]>=0.9
12
+ Requires-Dist: google-auth>=2.29
13
+ Requires-Dist: google-cloud-storage>=2.16
14
+ Requires-Dist: google-cloud-secret-manager>=2.20
15
+ Requires-Dist: kafka-python>=2.0
16
+ Requires-Dist: google-api-python-client>=2.126
17
+ Requires-Dist: pyyaml>=6.0
18
+ Requires-Dist: mcp>=1.0
19
+ Requires-Dist: duckdb>=1.0
@@ -0,0 +1,44 @@
1
+ dcf/__init__.py,sha256=Ho8RmXNVjGjQTqCfWWtH70yn1KQYTuqbo_kvY-CgVTE,167
2
+ dcf/cli.py,sha256=XQyX_xlEWMjA5c8xNoMv2HQCcWn0SKtVQEP5BELWdBk,32076
3
+ dcf/local_deploy.py,sha256=i7VX5_2Y2M6IrGoNtqLNHmGrSPNw67IpNdFNtwURSAY,27404
4
+ dcf/project.py,sha256=Wu35GAAtoOCQeWgkFXtUEb5P-zDkuu3NyuuneQRC9kE,729
5
+ dcf/spark_session.py,sha256=2NbmlP85JsQDFuIuvu6NKysp9yuUoonTG_3vtfvRL8E,2466
6
+ dcf/warehouse_reader.py,sha256=E6RBPRXfqCkWicOfYOD1Dmktqk1ptcb4xjrmu7evQIY,11639
7
+ dcf/config/__init__.py,sha256=LD-_Kb5WHdphKsEd7adl9JLJDZqxj_LFg5hY7DhVyfg,152
8
+ dcf/config/loader.py,sha256=Mp-dpXVtEbGk2bPC9GCNclOLY-LH_Qk6eEo2JbCzdB8,2474
9
+ dcf/config/models.py,sha256=gxUXpqKoFcHAwG4VDu69LMKhyHMtVwTIi-JZuVPuy0Q,8457
10
+ dcf/engine/__init__.py,sha256=LTC0Jxq0mtk5WXX8yPDbIs5dTB6HTujraBrYTYCp6M0,228
11
+ dcf/engine/fetcher.py,sha256=LNJNkwplGSw8L8I248ZB3P05GatPea6e9Lsrg1SAsqg,3931
12
+ dcf/engine/iterator.py,sha256=MytFnb-8slKiwILxb6LPtljZiXtlODLQtPISvnQ6ULU,3095
13
+ dcf/engine/projector.py,sha256=1O8Ublw4WEVB_RDa7Q1hRLZsK9TxjUXACBZpIXu1o4s,1592
14
+ dcf/engine/runner.py,sha256=1Luua20tetrzlqB50Z-UkPQkwtSNM3576A6SQbWBoNo,3388
15
+ dcf/engine/transforms.py,sha256=U9cCm30SRvKw3hrw7rwLOR0wV2_c18wSUokK7sW5kV4,1340
16
+ dcf/gcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ dcf/gcp/_collector_utils.py,sha256=B59gZ-ot9oEBlxxZxGwIMYaP0NVUZtL62sWAZfP841Q,2568
18
+ dcf/gcp/auth.py,sha256=xJCUlKDPAH7hyUgnPKZc4Oku9SSSDnQy3FBborygEZE,32
19
+ dcf/gcp/batch_deploy.py,sha256=9mlVBWzTUWr5hWQ8dLCUlxddv7W1P5gLtO_00ZQmYSY,19611
20
+ dcf/gcp/bootstrap.py,sha256=4msf67tF0azW7DsSI7u727Zll_gdpr_kh2fcvh2NUvA,5372
21
+ dcf/gcp/gcloud.py,sha256=3IAofCvEUCa7yQEQe677qWgtqy7khro_Uj-dtGwjZ4o,1378
22
+ dcf/gcp/terraform.py,sha256=zcc3HPiOAhLuRdyFM5xu-8-nH4MzW-Bak4X7PS7U53Y,4884
23
+ dcf/infra/modules/batch_collector/gcp/main.tf,sha256=XktjqXe1TUfBW1NwYPwowP5tgLuVlGmtWBmMRPrInss,1440
24
+ dcf/infra/modules/batch_collector/gcp/outputs.tf,sha256=fzp2UZksVL5XoZkUoppLZbC8yVL6HX-yLbmR7ObXP-A,133
25
+ dcf/infra/modules/batch_collector/gcp/variables.tf,sha256=cMxOWXwCI4tS6ouiwoUaaEUOH9tj_zUnNDrrx6ye8yM,925
26
+ dcf/infra/modules/batch_collector/gcp/airflow/main.tf,sha256=PQpSfw9-QMvD7dOTewNDT1IwZIk-ipxmryT92p5cc_0,3965
27
+ dcf/infra/modules/batch_collector/gcp/airflow/outputs.tf,sha256=P6YzhlkyNHj48NfKu7Ii_yN9aS1kU7WSN6YdbWrnTsA,284
28
+ dcf/infra/modules/batch_collector/gcp/airflow/variables.tf,sha256=_MdTnpqa8muNwIxHYS9NwDSLhjtzjOfnKgvj8aEsL_A,1191
29
+ dcf/infra/modules/batch_collector/local/main.tf,sha256=uQPs2pqvksPf5CxJ42zKjOGyQnFYbRncxTsyjbFDoCk,681
30
+ dcf/infra/modules/batch_collector/local/outputs.tf,sha256=9GergJ-CUrleBRZQEeL9WaHXzvw1LgNdfXTnqVAw1GE,103
31
+ dcf/infra/modules/batch_collector/local/variables.tf,sha256=RuvO1XD2NuEZi1WWFvjUoin5mDogbXfG1if3yarBZH4,645
32
+ dcf/infra/modules/batch_collector/local/airflow/main.tf,sha256=Ig4bQSAL9go9ORuPZX-BB4QKhF0XBzrpXvHhPGSvtEk,1572
33
+ dcf/infra/modules/batch_collector/local/airflow/outputs.tf,sha256=gtrTsLPXu42R703MF5PencrYTZ32kt4dwbeCBJ0ptso,270
34
+ dcf/infra/modules/batch_collector/local/airflow/variables.tf,sha256=oEdy9UqmXbIMCc1AAljSV4puAawC7E7c7RFN4KSFtik,1523
35
+ dcf/infra/templates/airflow.Dockerfile.tftpl,sha256=wTZLcltLsaQmhWy-hMV4zcSkwLEmZhSFn1hPexmGhq8,214
36
+ dcf/infra/templates/batch_collector.Dockerfile.tftpl,sha256=GXxRhbDN7sb6aFqMqXxD31eiAtcZe7QD22EG85RTWjI,413
37
+ dcf/infra/templates/docker-compose.yml.tftpl,sha256=vilo6ciRA7bDf12sykzFR77fTqDgjp8eEthT3y0TECU,1916
38
+ dcf/writer/__init__.py,sha256=REuBkwNC161W5Q-vipTopXv3lym_h3SeXoPcccM7R8g,48
39
+ dcf/writer/iceberg.py,sha256=utpMpsV3sY4H2lxh053ilLY-MPpWM6UspCPNNMh_5hY,10831
40
+ data_collection_framework-0.1.0.dist-info/METADATA,sha256=rzizP783h34ZaSH4BGeadGASiRk-7Wo7wQuTF3sui5I,552
41
+ data_collection_framework-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
42
+ data_collection_framework-0.1.0.dist-info/entry_points.txt,sha256=7Mq6Qrk53kSfUB5LDL5xxf1QYxQnSOyiemcz2ELZyFs,36
43
+ data_collection_framework-0.1.0.dist-info/top_level.txt,sha256=5c4u5AKYDn87yt1HpnQMlSf-1WR0yyaRp1XC0YUJqF0,4
44
+ data_collection_framework-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ dcf = dcf.cli:app
dcf/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .engine.runner import run_collector
2
+ from .config import load_collector, load_all_collectors
3
+
4
+ __all__ = ["run_collector", "load_collector", "load_all_collectors"]