ingestflow-sdk 1.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ Metadata-Version: 2.4
2
+ Name: ingestflow-sdk
3
+ Version: 1.0.9
4
+ Summary: A Python SDK for data ingestion and tracking
5
+ Author: InduPrakash
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: pandas>=2.2.0
8
+ Requires-Dist: psycopg2-binary>=2.9.9
9
+ Requires-Dist: python-dotenv>=1.0.1
10
+ Dynamic: author
11
+ Dynamic: requires-dist
12
+ Dynamic: requires-python
13
+ Dynamic: summary
@@ -0,0 +1,23 @@
1
+ from ingestflow.database.connection import DatabaseConnection
2
+ from ingestflow.trackers.metadata_manager import MetadataManager
3
+ from ingestflow.logging.logger import Logger
4
+ from ingestflow.trackers.file_tracker import FileTracker
5
+ from ingestflow.readers.csv_reader import CsvReader
6
+ from ingestflow.readers.json_reader import JsonReader
7
+ from ingestflow.utils.run_id_generator import RunIdGenerator
8
+ from ingestflow.trackers.record_tracker import RecordTracker
9
+ from ingestflow.writers.postgres_writer import PostgresWriter
10
+ from ingestflow.validators.schema_validator import SchemaValidator
11
+
12
+ __all__ = [
13
+ "DatabaseConnection",
14
+ "MetadataManager",
15
+ "FileTracker",
16
+ "CsvReader",
17
+ "JsonReader",
18
+ "RunIdGenerator",
19
+ "Logger",
20
+ "RecordTracker",
21
+ "PostgresWriter",
22
+ "SchemaValidator",
23
+ ]
@@ -0,0 +1,13 @@
1
+ Metadata-Version: 2.4
2
+ Name: ingestflow-sdk
3
+ Version: 1.0.9
4
+ Summary: A Python SDK for data ingestion and tracking
5
+ Author: InduPrakash
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: pandas>=2.2.0
8
+ Requires-Dist: psycopg2-binary>=2.9.9
9
+ Requires-Dist: python-dotenv>=1.0.1
10
+ Dynamic: author
11
+ Dynamic: requires-dist
12
+ Dynamic: requires-python
13
+ Dynamic: summary
@@ -0,0 +1,22 @@
1
+ pyproject.toml
2
+ setup.py
3
+ ingestflow/__init__.py
4
+ ingestflow_sdk.egg-info/PKG-INFO
5
+ ingestflow_sdk.egg-info/SOURCES.txt
6
+ ingestflow_sdk.egg-info/dependency_links.txt
7
+ ingestflow_sdk.egg-info/requires.txt
8
+ ingestflow_sdk.egg-info/top_level.txt
9
+ tests/test_csv_reader.py
10
+ tests/test_data_validator.py
11
+ tests/test_database_connection.py
12
+ tests/test_duplicate_file.py
13
+ tests/test_file_tracker.py
14
+ tests/test_incremental_load.py
15
+ tests/test_json_reader.py
16
+ tests/test_logger.py
17
+ tests/test_merge_load.py
18
+ tests/test_metadata_manager.py
19
+ tests/test_postgres_writer.py
20
+ tests/test_reject_file.py
21
+ tests/test_run_id_generator.py
22
+ tests/test_schema_validator.py
@@ -0,0 +1,3 @@
1
+ pandas>=2.2.0
2
+ psycopg2-binary>=2.9.9
3
+ python-dotenv>=1.0.1
@@ -0,0 +1 @@
1
+ ingestflow
@@ -0,0 +1,6 @@
1
+ [build-system]
2
+ requires = [
3
+ "setuptools>=69.0",
4
+ "wheel"
5
+ ]
6
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,24 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="ingestflow-sdk",
5
+ #same name should be given in PyPi account
6
+ version="1.0.9",
7
+ description="A Python SDK for data ingestion and tracking",
8
+ author="InduPrakash",
9
+ packages=find_packages(),
10
+
11
+ # Minimal Python version
12
+ python_requires=">=3.10",
13
+
14
+ # Runtime dependencies
15
+ install_requires=[
16
+ "pandas>=2.2.0",
17
+ "psycopg2-binary>=2.9.9",
18
+ "python-dotenv>=1.0.1"
19
+ ],
20
+
21
+ # If you want to include non-python files (README, LICENSE, etc.)
22
+ include_package_data=True,
23
+
24
+ )
@@ -0,0 +1,11 @@
1
+ from ingestflow.readers.csv_reader import CsvReader
2
+
3
+ def test_csv_reader():
4
+ df = CsvReader.read(
5
+ "tests/source_file/employees.csv"
6
+ )
7
+ print(df)
8
+
9
+ if __name__ == "__main__":
10
+
11
+ test_csv_reader()
@@ -0,0 +1,34 @@
1
+ import pandas as pd
2
+ from ingestflow.validators.data_validator import DataValidator
3
+
4
+ def test_data_validator():
5
+ df = pd.DataFrame(
6
+ {
7
+ "id": [1, None, 3, 4, None],
8
+ "name": [
9
+ "John",
10
+ None,
11
+ "Bob",
12
+ None,
13
+ "Alice"
14
+ ]
15
+ }
16
+ )
17
+ valid_df, invalid_df = (
18
+ DataValidator.validate_dataframe(df)
19
+ )
20
+ print("\nVALID DATA")
21
+ print(valid_df)
22
+ print("\nINVALID DATA")
23
+ print(invalid_df)
24
+
25
+ reject_file = (
26
+ DataValidator.generate_reject_file(
27
+ invalid_df,
28
+ "RUN_20260903_001"
29
+ )
30
+ )
31
+ print(reject_file)
32
+
33
+ if __name__ == "__main__":
34
+ test_data_validator()
@@ -0,0 +1,10 @@
1
+ from ingestflow.database.connection import DatabaseConnection
2
+
3
+
4
+ def test_database_connection():
5
+ db = DatabaseConnection()
6
+ db.test_connection()
7
+
8
+ if __name__ == "__main__":
9
+
10
+ test_database_connection()
@@ -0,0 +1,35 @@
1
+ # from ingestflow.trackers.file_tracker import FileTracker
2
+
3
+ # tracker = FileTracker()
4
+ # file_hash = (
5
+ # tracker.get_file_hash(
6
+ # "tests/source_file/employees.csv"
7
+ # )
8
+ # )
9
+ # is_duplicate = (
10
+ # tracker.is_duplicate_file(
11
+ # file_hash
12
+ # )
13
+ # )
14
+ # print(
15
+ # f"Duplicate File : "
16
+ # f"{is_duplicate}"
17
+ # )
18
+
19
+ from ingestflow.trackers.file_tracker import FileTracker
20
+ from ingestflow.trackers.metadata_manager import MetadataManager
21
+
22
+
23
+ def test_duplicate_file():
24
+ metadata = MetadataManager()
25
+ metadata.create_metadata_tables()
26
+ tracker = FileTracker()
27
+ file_hash = tracker.get_file_hash(
28
+ "tests/source_file/employees.csv"
29
+ )
30
+
31
+ result = tracker.is_duplicate_file(
32
+ file_hash
33
+ )
34
+
35
+ assert result in [True, False]
@@ -0,0 +1,13 @@
1
+ from ingestflow.trackers.file_tracker import FileTracker
2
+
3
+ def test_file_hash():
4
+ tracker = FileTracker()
5
+ file_hash = tracker.generate_file_hash(
6
+ "tests/source_file/employees.csv"
7
+ )
8
+ print(
9
+ f"File Hash : {file_hash}"
10
+ )
11
+
12
+ if __name__ == "__main__":
13
+ test_file_hash()
@@ -0,0 +1,54 @@
1
+ # import pandas as pd
2
+
3
+ # from ingestflow.trackers.record_tracker import RecordTracker
4
+
5
+
6
+ # def test_incremental_load():
7
+
8
+ # source_df = pd.DataFrame(
9
+ # {
10
+ # "id": [1, 2, 3],
11
+ # "updated_date": [
12
+ # "2026-01-01",
13
+ # "2026-02-01",
14
+ # "2026-03-01"
15
+ # ]
16
+ # }
17
+ # )
18
+
19
+ # tracker = RecordTracker()
20
+
21
+ # incremental_df = (
22
+ # tracker.get_incremental_records(
23
+ # source_df,
24
+ # "employees",
25
+ # "updated_date"
26
+ # )
27
+ # )
28
+
29
+ # assert incremental_df is not None
30
+
31
+ import pandas as pd
32
+
33
+ from ingestflow.trackers.record_tracker import RecordTracker
34
+
35
+
36
+ def test_incremental_load():
37
+
38
+ source_df = pd.DataFrame(
39
+ {
40
+ "id": [1, 2, 3, 4, 5]
41
+ }
42
+ )
43
+
44
+ tracker = RecordTracker()
45
+
46
+ incremental_df = (
47
+ tracker.get_incremental_records(
48
+ source_df,
49
+ "employees",
50
+ "id"
51
+ )
52
+ )
53
+
54
+ assert incremental_df is not None
@@ -0,0 +1,15 @@
1
+ from ingestflow.readers.json_reader import JsonReader
2
+
3
+
4
+ def test_json_reader():
5
+
6
+ df = JsonReader.read(
7
+ "tests/source_file/employees.json"
8
+ )
9
+
10
+ print(df)
11
+
12
+
13
+ if __name__ == "__main__":
14
+
15
+ test_json_reader()
@@ -0,0 +1,12 @@
1
+ from ingestflow.logging.logger import Logger
2
+
3
+ summary = {
4
+ "Run ID": "RUN_20260603_001",
5
+ "File Name": "employees.csv",
6
+ "Load Type": "merge",
7
+ "Inserted Records": 10,
8
+ "Updated Records": 2,
9
+ "Status": "SUCCESS"
10
+ }
11
+
12
+ Logger.print_summary(summary)
@@ -0,0 +1,40 @@
1
+ import pandas as pd
2
+
3
+ from ingestflow.trackers.record_tracker import RecordTracker
4
+
5
+
6
+ def test_merge_load():
7
+
8
+ source_df = pd.DataFrame(
9
+ {
10
+ "id": [
11
+ 1,
12
+ 2,
13
+ 3
14
+ ],
15
+ "name": [
16
+ "John Updated",
17
+ "Alice",
18
+ "Bob"
19
+ ]
20
+ }
21
+ )
22
+
23
+ tracker = RecordTracker()
24
+
25
+ insert_df, update_df = (
26
+ tracker.identify_merge_records(
27
+ source_df,
28
+ "employees",
29
+ ["id"]
30
+ )
31
+ )
32
+
33
+ print("\nINSERT RECORDS")
34
+ print(insert_df)
35
+
36
+ print("\nUPDATE RECORDS")
37
+ print(update_df)
38
+
39
+ assert insert_df is not None
40
+ assert update_df is not None
@@ -0,0 +1,13 @@
1
+ from ingestflow.trackers.metadata_manager import MetadataManager
2
+
3
+
4
+ def test_metadata_tables():
5
+ metadata = MetadataManager()
6
+ metadata.create_metadata_tables()
7
+ print(
8
+ "Metadata tables created successfully"
9
+ )
10
+
11
+ if __name__ == "__main__":
12
+
13
+ test_metadata_tables()
@@ -0,0 +1,20 @@
1
+ import pandas as pd
2
+ from ingestflow.writers.postgres_writer import PostgresWriter
3
+
4
+ def test_postgres_writer():
5
+ df = pd.DataFrame(
6
+ {
7
+ "id": [1, 2],
8
+ "name": ["John", "Alice"]
9
+ }
10
+ )
11
+ writer = PostgresWriter()
12
+ writer.create_table_if_not_exists(
13
+ "employees",
14
+ df
15
+ )
16
+ inserted = writer.insert_records(
17
+ "employees",
18
+ df
19
+ )
20
+ assert inserted >= 0
@@ -0,0 +1,23 @@
1
+ import pandas as pd
2
+ from ingestflow.validators.data_validator import DataValidator
3
+
4
+ def test_reject_file():
5
+ invalid_df = pd.DataFrame(
6
+ {
7
+ "id": [2],
8
+ "name": ["Alice"],
9
+ "reject_reason": [
10
+ "Sample Reject"
11
+ ]
12
+ }
13
+ )
14
+ reject_file = (
15
+ DataValidator.generate_reject_file(
16
+ invalid_df,
17
+ "RUN_20260603_001"
18
+ )
19
+ )
20
+ print(reject_file)
21
+
22
+ if __name__ == "__main__":
23
+ test_reject_file()
@@ -0,0 +1,18 @@
1
+ from ingestflow.utils.run_id_generator import RunIdGenerator
2
+
3
+ def test_run_id():
4
+
5
+ print(
6
+ RunIdGenerator.generate()
7
+ )
8
+
9
+ print(
10
+ RunIdGenerator.generate()
11
+ )
12
+
13
+ print(
14
+ RunIdGenerator.generate()
15
+ )
16
+
17
+ if __name__ == "__main__":
18
+ test_run_id()
@@ -0,0 +1,20 @@
1
+ import pandas as pd
2
+ from ingestflow.validators.schema_validator import SchemaValidator
3
+
4
+ def test_schema_validator():
5
+ df = pd.DataFrame(
6
+ {
7
+ "id": [1, 2],
8
+ "name": ["John", "Alice"]
9
+ }
10
+ )
11
+ validator = SchemaValidator()
12
+ result = validator.validate_schema(
13
+ dataframe=df,
14
+ table_name="employees",
15
+ schema_mode="fail"
16
+ )
17
+ print(result)
18
+
19
+ if __name__ == "__main__":
20
+ test_schema_validator()