ingestify 0.6.2__tar.gz → 0.6.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {ingestify-0.6.2 → ingestify-0.6.4}/PKG-INFO +1 -1
  2. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/__init__.py +1 -1
  3. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/store/dataset/sqlalchemy/repository.py +46 -31
  4. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify.egg-info/PKG-INFO +1 -1
  5. {ingestify-0.6.2 → ingestify-0.6.4}/README.md +0 -0
  6. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/application/__init__.py +0 -0
  7. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/application/dataset_store.py +0 -0
  8. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/application/ingestion_engine.py +0 -0
  9. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/application/loader.py +0 -0
  10. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/application/secrets_manager.py +0 -0
  11. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/cmdline.py +0 -0
  12. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/__init__.py +0 -0
  13. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/__init__.py +0 -0
  14. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/base.py +0 -0
  15. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/data_spec_version_collection.py +0 -0
  16. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/dataset/__init__.py +0 -0
  17. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/dataset/collection.py +0 -0
  18. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/dataset/collection_metadata.py +0 -0
  19. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/dataset/dataset.py +0 -0
  20. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/dataset/dataset_repository.py +0 -0
  21. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/dataset/dataset_state.py +0 -0
  22. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/dataset/events.py +0 -0
  23. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/dataset/file.py +0 -0
  24. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/dataset/file_collection.py +0 -0
  25. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/dataset/file_repository.py +0 -0
  26. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/dataset/identifier.py +0 -0
  27. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/dataset/revision.py +0 -0
  28. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/dataset/selector.py +0 -0
  29. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/event/__init__.py +0 -0
  30. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/event/_old_event.py +0 -0
  31. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/event/dispatcher.py +0 -0
  32. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/event/domain_event.py +0 -0
  33. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/event/event_bus.py +0 -0
  34. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/event/publisher.py +0 -0
  35. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/event/subscriber.py +0 -0
  36. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/fetch_policy.py +0 -0
  37. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/ingestion/__init__.py +0 -0
  38. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/ingestion/ingestion_job.py +0 -0
  39. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/ingestion/ingestion_job_summary.py +0 -0
  40. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/ingestion/ingestion_plan.py +0 -0
  41. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/resources/__init__.py +0 -0
  42. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/resources/dataset_resource.py +0 -0
  43. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/sink.py +0 -0
  44. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/source.py +0 -0
  45. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/task/__init__.py +0 -0
  46. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/task/set.py +0 -0
  47. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/task/task.py +0 -0
  48. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/task/task_summary.py +0 -0
  49. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/models/timing.py +0 -0
  50. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/services/__init__.py +0 -0
  51. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/services/identifier_key_transformer.py +0 -0
  52. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/services/transformers/__init__.py +0 -0
  53. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/domain/services/transformers/kloppy_to_pandas.py +0 -0
  54. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/exceptions.py +0 -0
  55. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/__init__.py +0 -0
  56. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/fetch/__init__.py +0 -0
  57. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/fetch/http.py +0 -0
  58. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/serialization/__init__.py +0 -0
  59. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/sink/__init__.py +0 -0
  60. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/sink/postgresql.py +0 -0
  61. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/source/__init__.py +0 -0
  62. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/source/statsbomb_github.py +0 -0
  63. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/source/wyscout.py +0 -0
  64. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/store/__init__.py +0 -0
  65. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/store/dataset/__init__.py +0 -0
  66. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/store/dataset/sqlalchemy/__init__.py +0 -0
  67. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/store/dataset/sqlalchemy/tables.py +0 -0
  68. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/store/file/__init__.py +0 -0
  69. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/store/file/dummy_file_repository.py +0 -0
  70. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/store/file/local_file_repository.py +0 -0
  71. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/infra/store/file/s3_file_repository.py +0 -0
  72. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/main.py +0 -0
  73. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/server.py +0 -0
  74. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/source_base.py +0 -0
  75. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/static/templates/statsbomb_github/README.md +0 -0
  76. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/static/templates/statsbomb_github/config.yaml.jinja2 +0 -0
  77. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/static/templates/statsbomb_github/database/README.md +0 -0
  78. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/static/templates/statsbomb_github/query.py +0 -0
  79. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/static/templates/wyscout/.env +0 -0
  80. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/static/templates/wyscout/.gitignore +0 -0
  81. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/static/templates/wyscout/README.md +0 -0
  82. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/static/templates/wyscout/config.yaml.jinja2 +0 -0
  83. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/static/templates/wyscout/database/README.md +0 -0
  84. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/static/templates/wyscout/query.py +0 -0
  85. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify/utils.py +0 -0
  86. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify.egg-info/SOURCES.txt +0 -0
  87. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify.egg-info/dependency_links.txt +0 -0
  88. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify.egg-info/entry_points.txt +0 -0
  89. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify.egg-info/requires.txt +0 -0
  90. {ingestify-0.6.2 → ingestify-0.6.4}/ingestify.egg-info/top_level.txt +0 -0
  91. {ingestify-0.6.2 → ingestify-0.6.4}/setup.cfg +0 -0
  92. {ingestify-0.6.2 → ingestify-0.6.4}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ingestify
3
- Version: 0.6.2
3
+ Version: 0.6.4
4
4
  Summary: Data Ingestion Framework
5
5
  Author: Koen Vossen
6
6
  Author-email: info@koenvossen.nl
@@ -8,4 +8,4 @@ if not __INGESTIFY_SETUP__:
8
8
  from .infra import retrieve_http
9
9
  from .source_base import Source, DatasetResource
10
10
 
11
- __version__ = "0.6.2"
11
+ __version__ = "0.6.4"
@@ -152,7 +152,13 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
152
152
  def dialect(self) -> Dialect:
153
153
  return self.session_provider.dialect
154
154
 
155
- def _upsert(self, connection: Connection, table: Table, entities: list[dict]):
155
+ def _upsert(
156
+ self,
157
+ connection: Connection,
158
+ table: Table,
159
+ entities: list[dict],
160
+ immutable_rows: bool = False,
161
+ ):
156
162
  dialect = self.dialect.name
157
163
  if dialect == "mysql":
158
164
  from sqlalchemy.dialects.mysql import insert
@@ -167,13 +173,18 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
167
173
 
168
174
  primary_key_columns = [column for column in table.columns if column.primary_key]
169
175
 
170
- set_ = {
171
- name: getattr(stmt.excluded, name)
172
- for name, column in table.columns.items()
173
- if column not in primary_key_columns
174
- }
175
-
176
- stmt = stmt.on_conflict_do_update(index_elements=primary_key_columns, set_=set_)
176
+ if immutable_rows:
177
+ stmt = stmt.on_conflict_do_nothing(index_elements=primary_key_columns)
178
+ else:
179
+ set_ = {
180
+ name: getattr(stmt.excluded, name)
181
+ for name, column in table.columns.items()
182
+ if column not in primary_key_columns
183
+ }
184
+
185
+ stmt = stmt.on_conflict_do_update(
186
+ index_elements=primary_key_columns, set_=set_
187
+ )
177
188
 
178
189
  connection.execute(stmt)
179
190
 
@@ -257,33 +268,35 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
257
268
  if not selectors:
258
269
  raise ValueError("Selectors must contain at least one item")
259
270
 
260
- attribute_cte = self._build_cte(
261
- [selector.filtered_attributes for selector in selectors], "attributes"
262
- )
263
-
264
- keys = list(selectors[0].filtered_attributes.keys())
265
271
  first_selector = selectors[0].filtered_attributes
272
+ keys = list(first_selector.keys())
266
273
 
267
- join_conditions = []
268
- for k in keys:
269
- if dialect == "postgresql":
270
- column = dataset_table.c.identifier[k]
274
+ if keys:
275
+ attribute_cte = self._build_cte(
276
+ [selector.filtered_attributes for selector in selectors],
277
+ "attributes",
278
+ )
271
279
 
272
- # Take the value from the first selector to determine the type.
273
- # TODO: check all selectors to determine the type
274
- v = first_selector[k]
275
- if isinstance(v, int):
276
- column = column.as_integer()
280
+ join_conditions = []
281
+ for k in keys:
282
+ if dialect == "postgresql":
283
+ column = dataset_table.c.identifier[k]
284
+
285
+ # Take the value from the first selector to determine the type.
286
+ # TODO: check all selectors to determine the type
287
+ v = first_selector[k]
288
+ if isinstance(v, int):
289
+ column = column.as_integer()
290
+ else:
291
+ column = column.as_string()
277
292
  else:
278
- column = column.as_string()
279
- else:
280
- column = func.json_extract(dataset_table.c.identifier, f"$.{k}")
293
+ column = func.json_extract(dataset_table.c.identifier, f"$.{k}")
281
294
 
282
- join_conditions.append(attribute_cte.c[k] == column)
295
+ join_conditions.append(attribute_cte.c[k] == column)
283
296
 
284
- query = query.select_from(
285
- dataset_table.join(attribute_cte, and_(*join_conditions))
286
- )
297
+ query = query.select_from(
298
+ dataset_table.join(attribute_cte, and_(*join_conditions))
299
+ )
287
300
 
288
301
  if where:
289
302
  query = query.filter(text(where))
@@ -476,8 +489,10 @@ class SqlAlchemyDatasetRepository(DatasetRepository):
476
489
  with self.connect() as connection:
477
490
  try:
478
491
  self._upsert(connection, dataset_table, datasets_entities)
479
- self._upsert(connection, revision_table, revision_entities)
480
- self._upsert(connection, file_table, file_entities)
492
+ self._upsert(
493
+ connection, revision_table, revision_entities, immutable_rows=True
494
+ )
495
+ self._upsert(connection, file_table, file_entities, immutable_rows=True)
481
496
  except Exception:
482
497
  connection.rollback()
483
498
  raise
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ingestify
3
- Version: 0.6.2
3
+ Version: 0.6.4
4
4
  Summary: Data Ingestion Framework
5
5
  Author: Koen Vossen
6
6
  Author-email: info@koenvossen.nl
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes