PyPI - bizon - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

bizon 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

bizon/connectors/destinations/bigquery/config/bigquery_incremental.example.yml ADDED Viewed

@@ -0,0 +1,34 @@
+name: hubspot contacts to bigquery (incremental)
+source:
+  name: hubspot
+  stream: contacts
+  sync_mode: incremental
+  cursor_field: updatedAt  # HubSpot's timestamp field for filtering
+  properties:
+    strategy: all
+  authentication:
+    type: api_key
+    api_key: <MY_API_KEY>
+destination:
+  # Authentication: If empty it will be infered.
+  # Must have the bigquery.jobUser
+  # Must have the bigquery.dataEditor and storage.objectUser on the supplied dataset and bucket
+  name: bigquery
+  config:
+    buffer_size: 10 # in Mb
+    buffer_flush_timeout: 300 # in seconds
+    dataset_id: bizon_test
+    dataset_location: US
+    project_id: my-gcp-project-id
+    gcs_buffer_bucket: bizon-buffer
+    gcs_buffer_format: parquet
+    # Optional: service_account_key for explicit authentication
+    # service_account_key: >-
+    #   { ... }
+# How incremental sync works:
+# 1. First run: Behaves like full_refresh (fetches all data)
+# 2. Subsequent runs: Only fetches records where cursor_field > last_run
+# 3. Uses append-only strategy - new records are appended to existing data

bizon/connectors/destinations/bigquery/src/destination.py CHANGED Viewed

@@ -210,7 +210,11 @@ class BigQueryDestination(AbstractDestination):
             return True
         elif self.sync_metadata.sync_mode == SourceSyncModes.INCREMENTAL:
-            # TO DO: Implement incremental sync
+            # Append data from incremental temp table to main table
+            logger.info(f"Appending data from {self.temp_table_id} to {self.table_id} ...")
+            self.bq_client.query(f"INSERT INTO {self.table_id} SELECT * FROM {self.temp_table_id}").result()
+            logger.info(f"Deleting incremental temp table {self.temp_table_id} ...")
+            self.bq_client.delete_table(self.temp_table_id, not_found_ok=True)
             return True
         elif self.sync_metadata.sync_mode == SourceSyncModes.STREAM:

bizon/connectors/destinations/bigquery_streaming/src/destination.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 import tempfile
-from datetime import datetime
 from typing import List, Tuple
 import orjson
@@ -162,39 +161,6 @@ class BigQueryStreamingDestination(AbstractDestination):
         response = write_client.append_rows(iter([request]))
         return response.code().name
-    def safe_cast_record_values(self, row: dict):
-        """
-        Safe cast record values to the correct type for BigQuery.
-        """
-        for col in self.record_schemas[self.destination_id]:
-            # Handle dicts as strings
-            if col.type in [BigQueryColumnType.STRING, BigQueryColumnType.JSON]:
-                if isinstance(row[col.name], dict) or isinstance(row[col.name], list):
-                    row[col.name] = orjson.dumps(row[col.name]).decode("utf-8")
-            # Handle timestamps
-            if (
-                col.type in [BigQueryColumnType.TIMESTAMP, BigQueryColumnType.DATETIME]
-                and col.default_value_expression is None
-            ):
-                if isinstance(row[col.name], int):
-                    if row[col.name] > datetime(9999, 12, 31).timestamp():
-                        row[col.name] = datetime.fromtimestamp(row[col.name] / 1_000_000).strftime(
-                            "%Y-%m-%d %H:%M:%S.%f"
-                        )
-                    else:
-                        try:
-                            row[col.name] = datetime.fromtimestamp(row[col.name]).strftime("%Y-%m-%d %H:%M:%S.%f")
-                        except ValueError:
-                            error_message = (
-                                f"Error casting timestamp for destination '{self.destination_id}' column '{col.name}'. "
-                                f"Invalid timestamp value: {row[col.name]} ({type(row[col.name])}). "
-                                "Consider using a transformation."
-                            )
-                            logger.error(error_message)
-                            raise ValueError(error_message)
-        return row
     @retry(
         retry=retry_if_exception_type(
             (
@@ -281,10 +247,7 @@ class BigQueryStreamingDestination(AbstractDestination):
         if self.config.unnest:
             # We cannot use the `json_decode` method here because of the issue: https://github.com/pola-rs/polars/issues/22371
-            rows_to_insert = [
-                self.safe_cast_record_values(orjson.loads(row))
-                for row in df_destination_records["source_data"].to_list()
-            ]
+            rows_to_insert = [orjson.loads(row) for row in df_destination_records["source_data"].to_list()]
         else:
             df_destination_records = df_destination_records.with_columns(
                 pl.col("bizon_extracted_at").dt.strftime("%Y-%m-%d %H:%M:%S").alias("bizon_extracted_at"),

bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import os
 import tempfile
 from concurrent.futures import ThreadPoolExecutor, as_completed
-from datetime import datetime
 from typing import List, Tuple, Type
 import orjson
@@ -40,6 +39,7 @@ from bizon.destination.destination import AbstractDestination
 from bizon.engine.backend.backend import AbstractBackend
 from bizon.monitoring.monitor import AbstractMonitor
 from bizon.source.callback import AbstractSourceCallback
+from bizon.source.config import SourceSyncModes
 from .config import BigQueryStreamingV2ConfigDetails
 from .proto_utils import get_proto_schema_and_class
@@ -81,6 +81,17 @@ class BigQueryStreamingV2Destination(AbstractDestination):
         tabled_id = f"{self.sync_metadata.source_name}_{self.sync_metadata.stream_name}"
         return self.destination_id or f"{self.project_id}.{self.dataset_id}.{tabled_id}"
+    @property
+    def temp_table_id(self) -> str:
+        if self.sync_metadata.sync_mode == SourceSyncModes.FULL_REFRESH:
+            return f"{self.table_id}_temp"
+        elif self.sync_metadata.sync_mode == SourceSyncModes.INCREMENTAL:
+            return f"{self.table_id}_incremental"
+        elif self.sync_metadata.sync_mode == SourceSyncModes.STREAM:
+            return f"{self.table_id}"
+        # Default fallback
+        return f"{self.table_id}"
     def get_bigquery_schema(self) -> List[bigquery.SchemaField]:
         if self.config.unnest:
             if len(list(self.record_schemas.keys())) == 1:
@@ -165,36 +176,6 @@ class BigQueryStreamingV2Destination(AbstractDestination):
             logger.error(f"Stream name: {stream_name}")
             raise
-    def safe_cast_record_values(self, row: dict):
-        """
-        Safe cast record values to the correct type for BigQuery.
-        """
-        for col in self.record_schemas[self.destination_id]:
-            # Handle dicts as strings
-            if col.type in ["STRING", "JSON"]:
-                if isinstance(row[col.name], dict) or isinstance(row[col.name], list):
-                    row[col.name] = orjson.dumps(row[col.name]).decode("utf-8")
-            # Handle timestamps
-            if col.type in ["TIMESTAMP", "DATETIME"] and col.default_value_expression is None:
-                if isinstance(row[col.name], int):
-                    if row[col.name] > datetime(9999, 12, 31).timestamp():
-                        row[col.name] = datetime.fromtimestamp(row[col.name] / 1_000_000).strftime(
-                            "%Y-%m-%d %H:%M:%S.%f"
-                        )
-                    else:
-                        try:
-                            row[col.name] = datetime.fromtimestamp(row[col.name]).strftime("%Y-%m-%d %H:%M:%S.%f")
-                        except ValueError:
-                            error_message = (
-                                f"Error casting timestamp for destination '{self.destination_id}' column '{col.name}'. "
-                                f"Invalid timestamp value: {row[col.name]} ({type(row[col.name])}). "
-                                "Consider using a transformation."
-                            )
-                            logger.error(error_message)
-                            raise ValueError(error_message)
-        return row
     @staticmethod
     def to_protobuf_serialization(TableRowClass: Type[Message], row: dict) -> bytes:
         """Convert a row to a Protobuf serialization."""
@@ -263,14 +244,14 @@ class BigQueryStreamingV2Destination(AbstractDestination):
                     deserialized_row = self.from_protobuf_serialization(table_row_class, serialized_row)
                     deserialized_rows.append(deserialized_row)
-                # For large rows, we need to use the main client
+                # For large rows, we need to use the main client (write to temp_table_id)
                 job_config = bigquery.LoadJobConfig(
                     source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
-                    schema=self.bq_client.get_table(self.table_id).schema,
+                    schema=self.bq_client.get_table(self.temp_table_id).schema,
                     ignore_unknown_values=True,
                 )
                 load_job = self.bq_client.load_table_from_json(
-                    deserialized_rows, self.table_id, job_config=job_config, timeout=300
+                    deserialized_rows, self.temp_table_id, job_config=job_config, timeout=300
                 )
                 result = load_job.result()
                 if load_job.state != "DONE":
@@ -292,9 +273,9 @@ class BigQueryStreamingV2Destination(AbstractDestination):
             raise
     def load_to_bigquery_via_streaming(self, df_destination_records: pl.DataFrame) -> str:
-        # Create table if it does not exist
+        # Create table if it does not exist (use temp_table_id for staging)
         schema = self.get_bigquery_schema()
-        table = bigquery.Table(self.table_id, schema=schema)
+        table = bigquery.Table(self.temp_table_id, schema=schema)
         time_partitioning = TimePartitioning(
             field=self.config.time_partitioning.field, type_=self.config.time_partitioning.type
         )
@@ -305,7 +286,7 @@ class BigQueryStreamingV2Destination(AbstractDestination):
         try:
             table = self.bq_client.create_table(table)
         except Conflict:
-            table = self.bq_client.get_table(self.table_id)
+            table = self.bq_client.get_table(self.temp_table_id)
             # Compare and update schema if needed
             existing_fields = {field.name: field for field in table.schema}
             new_fields = {field.name: field for field in self.get_bigquery_schema()}
@@ -319,12 +300,13 @@ class BigQueryStreamingV2Destination(AbstractDestination):
                 table.schema = updated_schema
                 table = self.bq_client.update_table(table, ["schema"])
-        # Create the stream
-        if self.destination_id:
-            project, dataset, table_name = self.destination_id.split(".")
+        # Create the stream (use temp_table_id for staging)
+        temp_table_parts = self.temp_table_id.split(".")
+        if len(temp_table_parts) == 3:
+            project, dataset, table_name = temp_table_parts
             parent = BigQueryWriteClient.table_path(project, dataset, table_name)
         else:
-            parent = BigQueryWriteClient.table_path(self.project_id, self.dataset_id, self.destination_id)
+            parent = BigQueryWriteClient.table_path(self.project_id, self.dataset_id, temp_table_parts[-1])
         stream_name = f"{parent}/_default"
@@ -333,9 +315,7 @@ class BigQueryStreamingV2Destination(AbstractDestination):
         if self.config.unnest:
             serialized_rows = [
-                self.to_protobuf_serialization(
-                    TableRowClass=TableRow, row=self.safe_cast_record_values(orjson.loads(row))
-                )
+                self.to_protobuf_serialization(TableRowClass=TableRow, row=orjson.loads(row))
                 for row in df_destination_records["source_data"].to_list()
             ]
         else:
@@ -442,3 +422,29 @@ class BigQueryStreamingV2Destination(AbstractDestination):
             if large_rows:
                 logger.warning(f"Yielding large rows batch of {len(large_rows)} rows")
             yield {"stream_batch": current_batch, "json_batch": large_rows}
+    def finalize(self):
+        """Finalize the sync by moving data from temp table to main table based on sync mode."""
+        if self.sync_metadata.sync_mode == SourceSyncModes.FULL_REFRESH:
+            # Replace main table with temp table data
+            logger.info(f"Loading temp table {self.temp_table_id} data into {self.table_id} ...")
+            self.bq_client.query(
+                f"CREATE OR REPLACE TABLE {self.table_id} AS SELECT * FROM {self.temp_table_id}"
+            ).result()
+            logger.info(f"Deleting temp table {self.temp_table_id} ...")
+            self.bq_client.delete_table(self.temp_table_id, not_found_ok=True)
+            return True
+        elif self.sync_metadata.sync_mode == SourceSyncModes.INCREMENTAL:
+            # Append data from incremental temp table to main table
+            logger.info(f"Appending data from {self.temp_table_id} to {self.table_id} ...")
+            self.bq_client.query(f"INSERT INTO {self.table_id} SELECT * FROM {self.temp_table_id}").result()
+            logger.info(f"Deleting incremental temp table {self.temp_table_id} ...")
+            self.bq_client.delete_table(self.temp_table_id, not_found_ok=True)
+            return True
+        elif self.sync_metadata.sync_mode == SourceSyncModes.STREAM:
+            # Direct writes, no finalization needed
+            return True
+        return True

bizon/connectors/destinations/file/config/file_incremental.example.yml ADDED Viewed

@@ -0,0 +1,22 @@
+name: dummy to file (incremental)
+source:
+  name: dummy
+  stream: creatures
+  sync_mode: incremental
+  cursor_field: updated_at  # Field to filter records by timestamp
+  authentication:
+    type: api_key
+    params:
+      token: dummy_key
+destination:
+  name: file
+  config:
+    format: json
+# How incremental sync works with file destination:
+# 1. First run: Behaves like full_refresh (creates new file)
+# 2. Subsequent runs: Only fetches records where cursor_field > last_run
+# 3. New records are appended to the existing JSON file
+# 4. Writes to temp file (_incremental.json) then appends to main file on finalize

bizon/connectors/destinations/file/src/destination.py CHANGED Viewed

@@ -1,13 +1,17 @@
+import os
+import shutil
 from typing import Tuple
 import orjson
 import polars as pl
+from loguru import logger
 from bizon.common.models import SyncMetadata
 from bizon.destination.destination import AbstractDestination
 from bizon.engine.backend.backend import AbstractBackend
 from bizon.monitoring.monitor import AbstractMonitor
 from bizon.source.callback import AbstractSourceCallback
+from bizon.source.config import SourceSyncModes
 from .config import FileDestinationDetailsConfig
@@ -24,6 +28,30 @@ class FileDestination(AbstractDestination):
         super().__init__(sync_metadata, config, backend, source_callback, monitor)
         self.config: FileDestinationDetailsConfig = config
+    @property
+    def file_path(self) -> str:
+        """Main output file path."""
+        return f"{self.destination_id}.json"
+    @property
+    def temp_file_path(self) -> str:
+        """Temp file path for FULL_REFRESH mode."""
+        if self.sync_metadata.sync_mode == SourceSyncModes.FULL_REFRESH.value:
+            return f"{self.destination_id}_temp.json"
+        elif self.sync_metadata.sync_mode == SourceSyncModes.INCREMENTAL.value:
+            return f"{self.destination_id}_incremental.json"
+        return self.file_path
+    @property
+    def write_path(self) -> str:
+        """Get the path to write to based on sync mode."""
+        if self.sync_metadata.sync_mode in [
+            SourceSyncModes.FULL_REFRESH.value,
+            SourceSyncModes.INCREMENTAL.value,
+        ]:
+            return self.temp_file_path
+        return self.file_path
     def check_connection(self) -> bool:
         return True
@@ -34,7 +62,7 @@ class FileDestination(AbstractDestination):
         if self.config.unnest:
             schema_keys = set([column.name for column in self.record_schemas[self.destination_id]])
-            with open(f"{self.destination_id}.json", "a") as f:
+            with open(self.write_path, "a") as f:
                 for value in [orjson.loads(data) for data in df_destination_records["source_data"].to_list()]:
                     assert set(value.keys()) == schema_keys, "Keys do not match the schema"
@@ -46,6 +74,35 @@ class FileDestination(AbstractDestination):
                     f.write(f"{orjson.dumps(row).decode('utf-8')}\n")
         else:
-            df_destination_records.write_ndjson(f"{self.destination_id}.json")
+            # Append mode for incremental, overwrite for full refresh on first write
+            with open(self.write_path, "a") as f:
+                for record in df_destination_records.iter_rows(named=True):
+                    f.write(f"{orjson.dumps(record).decode('utf-8')}\n")
         return True, ""
+    def finalize(self) -> bool:
+        """Finalize the sync by moving temp file to main file based on sync mode."""
+        if self.sync_metadata.sync_mode == SourceSyncModes.FULL_REFRESH.value:
+            # Replace main file with temp file
+            if os.path.exists(self.temp_file_path):
+                logger.info(f"File destination: Moving {self.temp_file_path} to {self.file_path}")
+                shutil.move(self.temp_file_path, self.file_path)
+            return True
+        elif self.sync_metadata.sync_mode == SourceSyncModes.INCREMENTAL.value:
+            # Append temp file contents to main file
+            if os.path.exists(self.temp_file_path):
+                logger.info(f"File destination: Appending {self.temp_file_path} to {self.file_path}")
+                with open(self.file_path, "a") as main_file:
+                    with open(self.temp_file_path) as temp_file:
+                        main_file.write(temp_file.read())
+                os.remove(self.temp_file_path)
+            return True
+        elif self.sync_metadata.sync_mode == SourceSyncModes.STREAM.value:
+            # Direct writes, no finalization needed
+            logger.info("File destination: STREAM sync batch completed")
+            return True
+        return True

bizon/connectors/destinations/logger/config/logger_incremental.example.yml ADDED Viewed

@@ -0,0 +1,21 @@
+name: dummy to logger (incremental)
+source:
+  name: dummy
+  stream: creatures
+  sync_mode: incremental
+  cursor_field: updated_at  # Field to filter records by timestamp
+  authentication:
+    type: api_key
+    params:
+      token: dummy_key
+destination:
+  name: logger
+  config:
+    dummy: dummy
+# How incremental sync works:
+# 1. First run: Behaves like full_refresh (fetches all data)
+# 2. Subsequent runs: Only fetches records where cursor_field > last_run
+# 3. Logger outputs records with [incremental] prefix for easy identification

bizon/connectors/destinations/logger/src/destination.py CHANGED Viewed

@@ -8,6 +8,7 @@ from bizon.destination.destination import AbstractDestination
 from bizon.engine.backend.backend import AbstractBackend
 from bizon.monitoring.monitor import AbstractMonitor
 from bizon.source.callback import AbstractSourceCallback
+from bizon.source.config import SourceSyncModes
 from .config import LoggerDestinationConfig
@@ -36,6 +37,17 @@ class LoggerDestination(AbstractDestination):
         return True
     def write_records(self, df_destination_records: pl.DataFrame) -> Tuple[bool, str]:
+        sync_mode_label = f"[{self.sync_metadata.sync_mode}]" if self.sync_metadata.sync_mode else ""
         for record in df_destination_records.iter_rows(named=True):
-            logger.info(record["source_data"])
+            logger.info(f"{sync_mode_label} {record['source_data']}")
         return True, ""
+    def finalize(self) -> bool:
+        """Finalize the sync - logs completion message based on sync mode."""
+        if self.sync_metadata.sync_mode == SourceSyncModes.FULL_REFRESH.value:
+            logger.info("Logger destination: FULL_REFRESH sync completed")
+        elif self.sync_metadata.sync_mode == SourceSyncModes.INCREMENTAL.value:
+            logger.info("Logger destination: INCREMENTAL sync completed (records appended)")
+        elif self.sync_metadata.sync_mode == SourceSyncModes.STREAM.value:
+            logger.info("Logger destination: STREAM sync batch completed")
+        return True

bizon/connectors/sources/gsheets/config/service_account_incremental.example.yml ADDED Viewed

@@ -0,0 +1,51 @@
+name: gsheets incremental sync
+source:
+  name: gsheets
+  stream: worksheet
+  sync_mode: incremental
+  cursor_field: updated_at  # Column name in your sheet containing timestamps
+  spreadsheet_url: <MY_SPREADSHEET_URL>
+  worksheet_name: Sheet1
+  service_account_key: >-
+    {
+      "type": "service_account",
+      "project_id": "<MY_GCP_PROJECT>",
+      "private_key_id": "xxx",
+      "private_key": "-----BEGIN PRIVATE KEY-----\n...\n-----END PRIVATE KEY-----\n",
+      "client_email": "bizon@<MY_GCP_PROJECT>.iam.gserviceaccount.com",
+      "client_id": "999999999999",
+      "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+      "token_uri": "https://oauth2.googleapis.com/token",
+      "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+      "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/...",
+      "universe_domain": "googleapis.com"
+    }
+destination:
+  name: bigquery
+  config:
+    project_id: <MY_GCP_PROJECT>
+    dataset_id: gsheets_data
+    dataset_location: US
+    gcs_buffer_bucket: <MY_GCS_BUCKET>
+    gcs_buffer_format: parquet
+engine:
+  backend:
+    type: bigquery
+    database: <MY_GCP_PROJECT>
+    schema: bizon_backend
+    syncCursorInDBEvery: 2
+# Incremental sync for Google Sheets:
+# - First run: Fetches all rows (full refresh behavior)
+# - Subsequent runs: Only fetches rows where cursor_field > last_run
+#
+# IMPORTANT: Your Google Sheet must have a timestamp column for incremental sync.
+# Common patterns:
+# - Add an "updated_at" column with formula: =NOW() (updates on edit)
+# - Use Google Apps Script to auto-update timestamps on row changes
+# - Manually maintain a "last_modified" column
+#
+# If your sheet doesn't have timestamps, use sync_mode: full_refresh instead.

bizon/connectors/sources/hubspot/config/api_key_incremental.example.yml ADDED Viewed

@@ -0,0 +1,40 @@
+name: hubspot contacts incremental sync
+source:
+  name: hubspot
+  stream: contacts
+  sync_mode: incremental
+  cursor_field: updatedAt  # HubSpot's timestamp field for contacts
+  properties:
+    strategy: all
+  authentication:
+    type: api_key
+    params:
+      token: <MY_API_KEY>
+destination:
+  name: bigquery
+  config:
+    project_id: <MY_GCP_PROJECT>
+    dataset_id: hubspot_data
+    dataset_location: US
+    gcs_buffer_bucket: <MY_GCS_BUCKET>
+    gcs_buffer_format: parquet
+engine:
+  backend:
+    type: bigquery
+    database: <MY_GCP_PROJECT>
+    schema: bizon_backend
+    syncCursorInDBEvery: 2
+# Incremental sync for HubSpot:
+# - First run: Fetches all contacts (full refresh behavior)
+# - Subsequent runs: Only fetches contacts where updatedAt > last_run
+#
+# Common cursor fields by stream:
+# - contacts: updatedAt
+# - companies: updatedAt
+# - deals: updatedAt
+# - tickets: updatedAt
+# - products: updatedAt

bizon/connectors/sources/notion/config/api_key_incremental.example.yml ADDED Viewed

@@ -0,0 +1,48 @@
+name: notion pages incremental sync
+source:
+  name: notion
+  stream: pages  # Options: databases, data_sources, pages, blocks, users
+  sync_mode: incremental
+  cursor_field: last_edited_time  # Notion's timestamp field
+  authentication:
+    type: api_key
+    params:
+      token: secret_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx  # Your Notion integration token
+  # List of database IDs to fetch data from
+  database_ids:
+    - "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
+  # Number of results per API call (1-100, default: 100)
+  page_size: 100
+destination:
+  name: bigquery
+  config:
+    project_id: <MY_GCP_PROJECT>
+    dataset_id: notion_data
+    dataset_location: US
+    gcs_buffer_bucket: <MY_GCS_BUCKET>
+    gcs_buffer_format: parquet
+engine:
+  backend:
+    type: bigquery
+    database: <MY_GCP_PROJECT>
+    schema: bizon_backend
+    syncCursorInDBEvery: 2
+# Incremental sync for Notion:
+# - First run: Fetches all pages/databases (full refresh behavior)
+# - Subsequent runs: Only fetches items where last_edited_time > last_run
+#
+# Supported streams for incremental sync:
+# - pages, all_pages: Uses Search API with last_edited_time filter
+# - databases, all_databases: Uses Search API to find updated data_sources
+# - blocks: First finds updated pages, then fetches their blocks
+# - blocks_markdown, all_blocks_markdown: Same as blocks, converts to markdown
+#
+# Not supported (falls back to full refresh):
+# - users: No timestamp filter available
+# - data_sources: Use databases stream instead

bizon/connectors/sources/notion/src/source.py CHANGED Viewed

@@ -10,7 +10,7 @@ from urllib3.util.retry import Retry
 from bizon.source.auth.builder import AuthBuilder
 from bizon.source.auth.config import AuthType
 from bizon.source.config import SourceConfig
-from bizon.source.models import SourceIteration, SourceRecord
+from bizon.source.models import SourceIncrementalState, SourceIteration, SourceRecord
 from bizon.source.source import AbstractSource
 from .config import NotionSourceConfig, NotionStreams
@@ -1132,6 +1132,348 @@ class NotionSource(AbstractSource):
         return SourceIteration(records=records, next_pagination=next_pagination)
+    # ==================== INCREMENTAL SYNC ====================
+    def search_with_filter(
+        self, start_cursor: str = None, last_edited_after: str = None, object_type: str = None
+    ) -> dict:
+        """
+        Search with optional last_edited_time filter for incremental sync.
+        Note: Notion Search API doesn't support timestamp filtering directly.
+        We sort by last_edited_time descending and filter client-side.
+        Args:
+            start_cursor: Pagination cursor
+            last_edited_after: ISO 8601 timestamp to filter by last_edited_time
+            object_type: Optional filter by object type ("page" or "database")
+        Returns:
+            Search results filtered by timestamp
+        """
+        payload = {"page_size": self.config.page_size}
+        if start_cursor:
+            payload["start_cursor"] = start_cursor
+        # Sort by last_edited_time descending to get most recent first
+        if last_edited_after:
+            payload["sort"] = {"direction": "descending", "timestamp": "last_edited_time"}
+        response = self.session.post(f"{BASE_URL}/search", json=payload)
+        response.raise_for_status()
+        result = response.json()
+        # Filter by object_type client-side if specified
+        if object_type:
+            result["results"] = [item for item in result.get("results", []) if item.get("object") == object_type]
+        # Filter by last_edited_time client-side
+        # Since results are sorted descending, stop when we hit an old item
+        if last_edited_after:
+            filtered_results = []
+            found_old_item = False
+            for item in result.get("results", []):
+                item_edited_time = item.get("last_edited_time", "")
+                if item_edited_time > last_edited_after:
+                    filtered_results.append(item)
+                else:
+                    found_old_item = True
+                    break
+            result["results"] = filtered_results
+            # If we found an old item, no need to paginate further
+            if found_old_item:
+                result["has_more"] = False
+        return result
+    def get_pages_after(self, source_state: SourceIncrementalState, pagination: dict = None) -> SourceIteration:
+        """
+        Fetch pages updated after source_state.last_run using the Search API with timestamp filter.
+        """
+        cursor = pagination.get("start_cursor") if pagination else None
+        last_edited_after = source_state.last_run.isoformat()
+        result = self.search_with_filter(start_cursor=cursor, last_edited_after=last_edited_after, object_type="page")
+        records = [SourceRecord(id=page["id"], data=page) for page in result.get("results", [])]
+        logger.info(f"Incremental sync: fetched {len(records)} pages updated after {last_edited_after}")
+        next_pagination = {"start_cursor": result.get("next_cursor")} if result.get("has_more") else {}
+        return SourceIteration(records=records, next_pagination=next_pagination)
+    def get_all_pages_after(self, source_state: SourceIncrementalState, pagination: dict = None) -> SourceIteration:
+        """
+        Fetch all pages accessible to the integration updated after source_state.last_run.
+        Same as get_pages_after but without database_ids filter.
+        """
+        return self.get_pages_after(source_state, pagination)
+    def get_databases_after(self, source_state: SourceIncrementalState, pagination: dict = None) -> SourceIteration:
+        """
+        Fetch databases updated after source_state.last_run.
+        """
+        cursor = pagination.get("start_cursor") if pagination else None
+        last_edited_after = source_state.last_run.isoformat()
+        # Search for data_sources (databases don't appear directly in search in 2025-09-03 API)
+        result = self.search_with_filter(
+            start_cursor=cursor, last_edited_after=last_edited_after, object_type="data_source"
+        )
+        # Extract unique database IDs from data_sources
+        seen_db_ids = set()
+        records = []
+        for ds in result.get("results", []):
+            parent = ds.get("parent", {})
+            if parent.get("type") == "database_id":
+                db_id = parent.get("database_id")
+                if db_id and db_id not in seen_db_ids:
+                    seen_db_ids.add(db_id)
+                    try:
+                        db_data = self.get_database(db_id)
+                        records.append(SourceRecord(id=db_data["id"], data=db_data))
+                    except Exception as e:
+                        logger.error(f"Failed to fetch database {db_id}: {e}")
+        logger.info(f"Incremental sync: fetched {len(records)} databases updated after {last_edited_after}")
+        next_pagination = {"start_cursor": result.get("next_cursor")} if result.get("has_more") else {}
+        return SourceIteration(records=records, next_pagination=next_pagination)
+    def get_blocks_after(self, source_state: SourceIncrementalState, pagination: dict = None) -> SourceIteration:
+        """
+        Fetch blocks from pages updated after source_state.last_run.
+        First finds updated pages, then fetches their blocks.
+        """
+        if pagination:
+            items_to_process = pagination.get("items_to_process", [])
+            items_loaded = pagination.get("items_loaded", False)
+            search_cursor = pagination.get("search_cursor")
+        else:
+            items_to_process = []
+            items_loaded = False
+            search_cursor = None
+        last_edited_after = source_state.last_run.isoformat()
+        # Collect pages updated after last_run
+        if not items_loaded:
+            while True:
+                result = self.search_with_filter(
+                    start_cursor=search_cursor, last_edited_after=last_edited_after, object_type="page"
+                )
+                for page in result.get("results", []):
+                    items_to_process.append(
+                        {
+                            "block_id": page["id"],
+                            "input_db_id": None,
+                            "input_page_id": None,
+                            "source_page_id": page["id"],
+                        }
+                    )
+                if result.get("has_more"):
+                    search_cursor = result.get("next_cursor")
+                else:
+                    break
+            items_loaded = True
+            logger.info(f"Incremental sync: found {len(items_to_process)} pages updated after {last_edited_after}")
+        if not items_to_process:
+            return SourceIteration(records=[], next_pagination={})
+        # Process a batch in parallel
+        batch_size = self.config.max_workers
+        batch = items_to_process[:batch_size]
+        items_to_process = items_to_process[batch_size:]
+        records = []
+        def fetch_item_blocks(item_info: dict) -> List[dict]:
+            return self.fetch_blocks_recursively(
+                block_id=item_info["block_id"],
+                parent_input_database_id=item_info["input_db_id"],
+                parent_input_page_id=item_info["input_page_id"],
+                source_page_id=item_info["source_page_id"],
+            )
+        with ThreadPoolExecutor(max_workers=self.config.max_workers) as executor:
+            futures = {executor.submit(fetch_item_blocks, item_info): item_info for item_info in batch}
+            for future in as_completed(futures):
+                item_info = futures[future]
+                try:
+                    blocks = future.result()
+                    for block in blocks:
+                        records.append(SourceRecord(id=block["id"], data=block))
+                except Exception as e:
+                    logger.error(f"Failed to fetch blocks from {item_info['block_id']}: {e}")
+        next_pagination = {"items_to_process": items_to_process, "items_loaded": True} if items_to_process else {}
+        return SourceIteration(records=records, next_pagination=next_pagination)
+    def get_blocks_markdown_after(
+        self, source_state: SourceIncrementalState, pagination: dict = None
+    ) -> SourceIteration:
+        """
+        Fetch blocks from pages updated after source_state.last_run and convert to markdown.
+        Respects database_ids and database_filters configuration.
+        """
+        if pagination:
+            items_to_process = pagination.get("items_to_process", [])
+            items_loaded = pagination.get("items_loaded", False)
+        else:
+            items_to_process = []
+            items_loaded = False
+        last_edited_after = source_state.last_run.isoformat()
+        # Collect pages updated after last_run from configured databases
+        if not items_loaded:
+            # Query each configured database with timestamp filter
+            for db_id in self.config.database_ids:
+                try:
+                    db_data = self.get_database(db_id)
+                    db_filter = self.get_filter_for_database(db_id)
+                    for ds in db_data.get("data_sources", []):
+                        ds_cursor = None
+                        while True:
+                            # Build filter with last_edited_time constraint
+                            incremental_filter = {
+                                "timestamp": "last_edited_time",
+                                "last_edited_time": {"after": last_edited_after},
+                            }
+                            # Combine with existing database filter if present
+                            if db_filter:
+                                combined_filter = {"and": [incremental_filter, db_filter]}
+                            else:
+                                combined_filter = incremental_filter
+                            result = self.query_data_source(ds["id"], ds_cursor, filter=combined_filter)
+                            for page in result.get("results", []):
+                                items_to_process.append(
+                                    {
+                                        "block_id": page["id"],
+                                        "input_db_id": db_id,
+                                        "input_page_id": None,
+                                        "source_page_id": page["id"],
+                                    }
+                                )
+                            if result.get("has_more"):
+                                ds_cursor = result.get("next_cursor")
+                            else:
+                                break
+                except Exception as e:
+                    logger.error(f"Failed to query database {db_id} for incremental sync: {e}")
+            # Also check configured page_ids (filter by last_edited_time)
+            for page_id in self.config.page_ids:
+                try:
+                    page_data = self.get_page(page_id)
+                    if page_data.get("last_edited_time", "") > last_edited_after:
+                        items_to_process.append(
+                            {
+                                "block_id": page_id,
+                                "input_db_id": None,
+                                "input_page_id": page_id,
+                                "source_page_id": page_id,
+                            }
+                        )
+                except Exception as e:
+                    logger.error(f"Failed to fetch page {page_id} for incremental sync: {e}")
+            items_loaded = True
+            logger.info(
+                f"Incremental sync: found {len(items_to_process)} pages for blocks_markdown after {last_edited_after}"
+            )
+        if not items_to_process:
+            return SourceIteration(records=[], next_pagination={})
+        # Process a batch in parallel
+        batch_size = self.config.max_workers
+        batch = items_to_process[:batch_size]
+        items_to_process = items_to_process[batch_size:]
+        records = []
+        def fetch_and_convert_item(item_info: dict) -> List[dict]:
+            blocks = self.fetch_blocks_recursively(
+                block_id=item_info["block_id"],
+                parent_input_database_id=item_info["input_db_id"],
+                parent_input_page_id=item_info["input_page_id"],
+                source_page_id=item_info["source_page_id"],
+                fetch_child_databases=False,
+            )
+            block_records = []
+            for block in blocks or []:
+                if not block:
+                    continue
+                md = self._block_to_markdown(block)
+                block_records.append(
+                    {
+                        "block_id": block.get("id"),
+                        "block_type": block.get("type"),
+                        "markdown": md,
+                        "source_page_id": block.get("source_page_id"),
+                        "parent_block_id": block.get("parent_block_id"),
+                        "parent_input_database_id": block.get("parent_input_database_id"),
+                        "parent_input_page_id": block.get("parent_input_page_id"),
+                        "depth": block.get("depth"),
+                        "block_order": block.get("block_order"),
+                        "page_order": block.get("page_order"),
+                        "block_raw": block,
+                    }
+                )
+            return block_records
+        with ThreadPoolExecutor(max_workers=self.config.max_workers) as executor:
+            futures = {executor.submit(fetch_and_convert_item, item_info): item_info for item_info in batch}
+            for future in as_completed(futures):
+                item_info = futures[future]
+                try:
+                    block_records = future.result()
+                    for block_record in block_records:
+                        records.append(SourceRecord(id=block_record.get("block_id"), data=block_record))
+                except Exception as e:
+                    logger.error(f"Failed to fetch/convert blocks from {item_info['block_id']}: {e}")
+        next_pagination = {"items_to_process": items_to_process, "items_loaded": True} if items_to_process else {}
+        return SourceIteration(records=records, next_pagination=next_pagination)
+    def get_records_after(self, source_state: SourceIncrementalState, pagination: dict = None) -> SourceIteration:
+        """
+        Fetch records updated after source_state.last_run for incremental sync.
+        Supported streams:
+        - pages, all_pages: Uses Search API with last_edited_time filter
+        - databases, all_databases: Uses Search API to find updated data_sources
+        - blocks, all_blocks_markdown: First finds updated pages, then fetches their blocks
+        """
+        stream = self.config.stream
+        if stream in [NotionStreams.PAGES, NotionStreams.ALL_PAGES]:
+            return self.get_pages_after(source_state, pagination)
+        elif stream in [NotionStreams.DATABASES, NotionStreams.ALL_DATABASES]:
+            return self.get_databases_after(source_state, pagination)
+        elif stream == NotionStreams.BLOCKS:
+            return self.get_blocks_after(source_state, pagination)
+        elif stream in [NotionStreams.BLOCKS_MARKDOWN, NotionStreams.ALL_BLOCKS_MARKDOWN]:
+            return self.get_blocks_markdown_after(source_state, pagination)
+        else:
+            # For streams that don't support incremental, fall back to full refresh
+            logger.warning(f"Stream {stream} does not support incremental sync, falling back to full refresh")
+            return self.get(pagination)
     # ==================== MAIN DISPATCH ====================
     def get(self, pagination: dict = None) -> SourceIteration:

bizon/engine/pipeline/producer.py CHANGED Viewed

@@ -14,7 +14,9 @@ from bizon.common.models import BizonConfig
 from bizon.engine.backend.backend import AbstractBackend
 from bizon.engine.backend.models import CursorStatus
 from bizon.engine.queue.queue import AbstractQueue
+from bizon.source.config import SourceSyncModes
 from bizon.source.cursor import Cursor
+from bizon.source.models import SourceIncrementalState
 from bizon.source.source import AbstractSource
 from .models import PipelineReturnStatus
@@ -130,6 +132,37 @@ class Producer:
             self.queue.terminate(iteration=0)
             return PipelineReturnStatus.BACKEND_ERROR
+        # Handle incremental sync mode
+        source_incremental_state = None
+        is_incremental = self.bizon_config.source.sync_mode == SourceSyncModes.INCREMENTAL
+        if is_incremental:
+            # Get the last successful job to determine last_run timestamp
+            last_successful_job = self.backend.get_last_successful_stream_job(
+                name=self.bizon_config.name,
+                source_name=self.bizon_config.source.name,
+                stream_name=self.bizon_config.source.stream,
+            )
+            if last_successful_job:
+                # Create incremental state with last_run from previous job
+                source_incremental_state = SourceIncrementalState(
+                    last_run=last_successful_job.created_at,
+                    state={},
+                    cursor_field=self.bizon_config.source.cursor_field,
+                )
+                logger.info(
+                    f"Incremental sync: fetching records after {source_incremental_state.last_run} "
+                    f"using cursor_field: {source_incremental_state.cursor_field}"
+                )
+            else:
+                # First incremental run - fall back to full refresh behavior
+                logger.info(
+                    "Incremental sync: No previous successful job found. "
+                    "Falling back to full refresh behavior for first run."
+                )
+                is_incremental = False
         while not cursor.is_finished:
             if stop_event.is_set():
                 logger.info("Stop event is set, terminating producer ...")
@@ -180,7 +213,15 @@ class Producer:
             # Get the next data
             try:
-                source_iteration = self.source.get(pagination=cursor.pagination)
+                if is_incremental and source_incremental_state:
+                    # Use incremental fetching with get_records_after
+                    source_iteration = self.source.get_records_after(
+                        source_state=source_incremental_state,
+                        pagination=cursor.pagination,
+                    )
+                else:
+                    # Use standard fetching with get
+                    source_iteration = self.source.get(pagination=cursor.pagination)
             except Exception as e:
                 logger.error(traceback.format_exc())
                 logger.error(

bizon/source/config.py CHANGED Viewed

@@ -42,6 +42,12 @@ class SourceConfig(BaseModel, ABC):
         default=SourceSyncModes.FULL_REFRESH,
     )
+    cursor_field: Optional[str] = Field(
+        default=None,
+        description="Field name to use for incremental filtering (e.g., 'updated_at', 'modified_at'). "
+        "Source will fetch records where this field > last_run timestamp.",
+    )
     force_ignore_checkpoint: bool = Field(
         description="Whether to force recreate the sync from iteration 0. Existing checkpoints will be ignored.",
         default=False,

bizon/source/models.py CHANGED Viewed

@@ -44,4 +44,5 @@ class SourceIteration(BaseModel):
 class SourceIncrementalState(BaseModel):
     last_run: datetime = Field(..., description="Timestamp of the last successful run")
-    state: dict = Field(..., description="Incremental state information from the latest sync")
+    state: dict = Field(default_factory=dict, description="Incremental state information from the latest sync")
+    cursor_field: Optional[str] = Field(default=None, description="The field name to filter records by timestamp")

{bizon-0.2.0.dist-info → bizon-0.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bizon
-Version: 0.2.0
+Version: 0.3.0
 Summary: Extract and load your data reliably from API Clients with native fault-tolerant and checkpointing mechanism.
 Author-email: Antoine Balliet <antoine.balliet@gmail.com>, Anas El Mhamdi <anas.elmhamdi@gmail.com>
 License-File: LICENSE
@@ -153,6 +153,130 @@ Runner is the interface used by Bizon to run the pipeline. It can be configured
 - `process` (asynchronous)
 - `stream` (synchronous)
+## Sync Modes
+Bizon supports three sync modes:
+- `full_refresh`: Re-syncs all data from scratch on each run
+- `incremental`: Syncs only new/updated data since the last successful run
+- `stream`: Continuous streaming mode for real-time data (e.g., Kafka)
+### Incremental Sync
+Incremental sync fetches only new or updated records since the last successful run, using an **append-only** strategy.
+#### Configuration
+```yaml
+source:
+  name: your_source
+  stream: your_stream
+  sync_mode: incremental
+  cursor_field: updated_at  # The timestamp field to filter records by
+```
+#### How It Works
+```
+┌─────────────────────────────────────────────────────────────────────┐
+│                        INCREMENTAL SYNC FLOW                        │
+├─────────────────────────────────────────────────────────────────────┤
+│                                                                     │
+│  1. Producer checks for last successful job                         │
+│     └─> Backend.get_last_successful_stream_job()                    │
+│                                                                     │
+│  2. If found, creates SourceIncrementalState:                       │
+│     └─> last_run = previous_job.created_at                          │
+│     └─> cursor_field = config.cursor_field (e.g., "updated_at")     │
+│                                                                     │
+│  3. Calls source.get_records_after(source_state, pagination)        │
+│     └─> Source filters: WHERE cursor_field > last_run               │
+│                                                                     │
+│  4. Records written to temp table: {table}_incremental              │
+│                                                                     │
+│  5. finalize() appends temp table to main table                     │
+│     └─> INSERT INTO main_table SELECT * FROM temp_table             │
+│     └─> Deletes temp table                                          │
+│                                                                     │
+│  FIRST RUN: No previous job → falls back to get() (full refresh)    │
+│                                                                     │
+└─────────────────────────────────────────────────────────────────────┘
+```
+#### Configuration Options
+| Option | Required | Description | Example |
+|--------|----------|-------------|---------|
+| `sync_mode` | Yes | Set to `incremental` | `incremental` |
+| `cursor_field` | Yes | Timestamp field to filter by | `updated_at`, `last_edited_time`, `modified_at` |
+#### Supported Sources
+Sources must implement `get_records_after()` to support incremental sync:
+| Source | Cursor Field | Notes |
+|--------|--------------|-------|
+| `notion` | `last_edited_time` | Supports `pages`, `databases`, `blocks`, `blocks_markdown` streams |
+| (others) | Varies | Check source docs or implement `get_records_after()` |
+#### Supported Destinations
+Destinations must implement `finalize()` with incremental logic:
+| Destination | Support | Notes |
+|-------------|---------|-------|
+| `bigquery` | ✅ | Append-only via temp table |
+| `bigquery_streaming_v2` | ✅ | Append-only via temp table |
+| `file` | ✅ | Appends to existing file |
+| `logger` | ✅ | Logs completion |
+#### Example: Notion Incremental Sync
+```yaml
+name: notion_incremental_sync
+source:
+  name: notion
+  stream: blocks_markdown
+  sync_mode: incremental
+  cursor_field: last_edited_time
+  authentication:
+    type: api_key
+    params:
+      token: secret_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+  database_ids:
+    - "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
+  # Optional: filter which pages to sync
+  database_filters:
+    "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx":
+      property: "Status"
+      select:
+        equals: "Published"
+destination:
+  name: bigquery
+  config:
+    project_id: my-gcp-project
+    dataset_id: notion_data
+    dataset_location: US
+engine:
+  backend:
+    type: bigquery
+    database: my-gcp-project
+    schema: bizon_backend
+    syncCursorInDBEvery: 2
+```
+#### First Run Behavior
+On the first incremental run (no previous successful job):
+- Falls back to `get()` method (full refresh behavior)
+- All data is fetched and loaded
+- Job is marked as successful
+- Subsequent runs use `get_records_after()` with `last_run` timestamp
 ## Start syncing your data 🚀
 ### Quick setup without any dependencies ✌️

{bizon-0.2.0.dist-info → bizon-0.3.0.dist-info}/RECORD RENAMED Viewed

@@ -13,21 +13,24 @@ bizon/common/models.py,sha256=eL_Ii0CkeJFIjak1CKrB74mbC3OkmWP2uI27ynlYgkQ,10070
 bizon/common/errors/backoff.py,sha256=z7RkQt1Npdh0sfD3hBDaiWQKe4iqS6ewvT1Q4Fds5aU,508
 bizon/common/errors/errors.py,sha256=mrYx1uE2kOuR2pEaB7ztK1l2m0E4V-_-hxq-DuILerY,682
 bizon/connectors/destinations/bigquery/config/bigquery.example.yml,sha256=sy5-Piew00BlcjX5CFayFVrUq9G_vFYWXDmpWi9beTY,1263
+bizon/connectors/destinations/bigquery/config/bigquery_incremental.example.yml,sha256=z0pz4W1x0dlsoAjorYR2DxMjkzTvIWn9tigqtOR8PUY,1076
 bizon/connectors/destinations/bigquery/src/config.py,sha256=q55zR_9V5-ZZmOmSK7fDOHSzzYhoT-fwlppDzX4he9U,4000
-bizon/connectors/destinations/bigquery/src/destination.py,sha256=wJHT9KO5aA8sLEqgVbb9aVCXZ51_5ccgkPtTCXuBp6s,9503
+bizon/connectors/destinations/bigquery/src/destination.py,sha256=awS3dZsSKqLTVnhBKuP_9rXSt3IpGv3c4WjZOCwqu9o,9888
 bizon/connectors/destinations/bigquery_streaming/config/bigquery_streaming.example.yml,sha256=rF0mQ5IaOe6oqsbVy6q0innn7SXsOoBdBvIN8BTwPVc,1869
 bizon/connectors/destinations/bigquery_streaming/src/config.py,sha256=LdBKEqHPaGll8PW6c6q_lH7PJvsGdtv2BCrtB-TukTA,1898
-bizon/connectors/destinations/bigquery_streaming/src/destination.py,sha256=6PLO0zMbPskwtaeKfnOvu5Ls0Z-gl11uXHMCPIEdHmc,16043
+bizon/connectors/destinations/bigquery_streaming/src/destination.py,sha256=Uyne57NoT-z9uk7Yi4EgOUFYQ4QlvXDLFxgZC5KyCFE,14222
 bizon/connectors/destinations/bigquery_streaming_v2/config/bigquery_streaming_v2.example.yml,sha256=hIQXlXtiBT8DgMVAs0x_h-19xoLkjHr-Ko7oSn8jnc0,2023
 bizon/connectors/destinations/bigquery_streaming_v2/src/config.py,sha256=cdHST5Vx1VQbLsIVsPkoEtOJKmbA35XjsKzj6fZ5DHw,1907
-bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py,sha256=GSByVunYPXqaVbPbQGDKJX3b4ngUenHbvdJKIlb95a8,18680
+bizon/connectors/destinations/bigquery_streaming_v2/src/destination.py,sha256=5aXEsbzyWKzS2F1pFMZ8pdbJaXmdGTaIrwgl2cd1IbU,19026
 bizon/connectors/destinations/bigquery_streaming_v2/src/proto_utils.py,sha256=aWYVzMPMTgsdDapYniu8h6Tf2Pty4fDisT_33d9yEJ4,3692
 bizon/connectors/destinations/file/config/file.example.yml,sha256=sMeX92hTrTQUrLmQgQFsq5OdG5Dk3BbpDo0NhRbBahI,986
+bizon/connectors/destinations/file/config/file_incremental.example.yml,sha256=Xh5KwWiQRuq_MnMgOCHiHqIwHjOjXbwQlVlVcKdXARA,620
 bizon/connectors/destinations/file/src/config.py,sha256=dU64aFe7J63aBGh6Os8mXl2kvECj3s4pPC7H3EmOvb8,585
-bizon/connectors/destinations/file/src/destination.py,sha256=iVmFjLjjuGRD6jbivOUqSlYxtIIMz9buB5fvXpYMsYA,1827
+bizon/connectors/destinations/file/src/destination.py,sha256=RQEL0Z5l409S319fAJyvW8cDblUCVAxPhALJVhjQKDM,4253
 bizon/connectors/destinations/logger/config/logger.example.yml,sha256=KtQRmqqFeziJtBZ7vzrXGQLdTgWZNjxx2sdFXpIgIp4,672
+bizon/connectors/destinations/logger/config/logger_incremental.example.yml,sha256=rwTLlXib-Jo3b4-_NcFv2ShdPC73WEpiiX3apP3sKg0,541
 bizon/connectors/destinations/logger/src/config.py,sha256=vIV_G0k9c8DPcDxU6CGvEOL2zAEvAmKZcx3RV0eRi7A,426
-bizon/connectors/destinations/logger/src/destination.py,sha256=-KosqybNiJq3-mlrrxa0cSUdwmaDcelfeRQcytbfjBQ,1226
+bizon/connectors/destinations/logger/src/destination.py,sha256=YUC_lAN5nrcrNAN90hnalKFAKX49KTDlJwdLfwTaC0U,2007
 bizon/connectors/sources/cycle/config/cycle.example.yml,sha256=UDiqOa-8ZsykmNT625kxq9tyXOj_gKe9CFwg9r_8SYk,230
 bizon/connectors/sources/cycle/src/source.py,sha256=6sXMneq59XZAT5oJseM9k6sGJaoQw4NDp8FTtg8lPhk,4213
 bizon/connectors/sources/cycle/tests/cycle_customers.py,sha256=A48S20LxIC0A74JLoFn4NTHNTgBWV_5stTFtF1Gfk2c,271
@@ -43,9 +46,11 @@ bizon/connectors/sources/dummy/tests/dummy_pipeline_write_data_bigquery.py,sha25
 bizon/connectors/sources/dummy/tests/dummy_pipeline_write_data_bigquery_through_kafka.py,sha256=PFUhDuFw1Q1AMNMsnXPQxoqHIWf_wHEL1hLQodYlLcQ,596
 bizon/connectors/sources/gsheets/config/default_auth.example.yml,sha256=KOBp6MfO4uJwpwEYW0tJ4X5ctVwwdur9poJB4Ohba6s,348
 bizon/connectors/sources/gsheets/config/service_account.example.yml,sha256=XxVUnk9gGWc3lDb8CnzTHjTu8xz4Asyr5tXzY6qLvPg,1081
+bizon/connectors/sources/gsheets/config/service_account_incremental.example.yml,sha256=WGvAtw4aOwSMWrSZW0tHaRncZnGbI6gd4LJk1aHIP_c,1765
 bizon/connectors/sources/gsheets/src/source.py,sha256=xNF5FR9QLTM4kCiZ2eKZ5CZWNhLw6tyLaJZbliNzYnY,5675
 bizon/connectors/sources/gsheets/tests/gsheets_pipeline.py,sha256=lNSM3kZTd4W_-ajGIO3mdp8qGdEbnmWqsMm5pRiS0cw,181
 bizon/connectors/sources/hubspot/config/api_key.example.yml,sha256=VDTRloE5caqAdGdXgvsJZ6nQT46JHzX_YboxeGbpP18,389
+bizon/connectors/sources/hubspot/config/api_key_incremental.example.yml,sha256=g4SBeVEXSr3tCgy5VjgZPWkhnuvEZ0jl5nPNn3u05Jc,920
 bizon/connectors/sources/hubspot/config/oauth.example.yml,sha256=YqBtj1IxIsdM9E85_4eVWl6mPiHsQNoQn41EzCqORy0,499
 bizon/connectors/sources/hubspot/src/hubspot_base.py,sha256=THo8ImrPrIxeTuFcBMRJYwaDMstIfLIGjrQLE2cqqsU,3424
 bizon/connectors/sources/hubspot/src/hubspot_objects.py,sha256=ykqvxaFihv0e0A3-gGDmentp1KCGCoYvvDwZ3CcHzNg,6301
@@ -60,9 +65,10 @@ bizon/connectors/sources/kafka/src/decode.py,sha256=RhPjazRQHb72D9iBhb763Nje7SH9
 bizon/connectors/sources/kafka/src/source.py,sha256=0Hv6viyVZGAd4azhQnqCteyHuwsbbDL4rSGEjMCff9E,19722
 bizon/connectors/sources/kafka/tests/kafka_pipeline.py,sha256=9LaCqXJIEx2ye3dkWq0YK_bPX7d4fCX_OcDOJCk34WE,206
 bizon/connectors/sources/notion/config/api_key.example.yml,sha256=TagqOqaho4u_G5ZP4L8je89Y4G_NvCo8s4Wf9e8yVH8,1061
+bizon/connectors/sources/notion/config/api_key_incremental.example.yml,sha256=52uQJo-SrqFny00zIVbA86qVq3asYHMFALqBcdmPmc8,1499
 bizon/connectors/sources/notion/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 bizon/connectors/sources/notion/src/config.py,sha256=L-FZWijUa-aWK9VenWGsl6mv40i4ww46FacjYoX9gXo,1886
-bizon/connectors/sources/notion/src/source.py,sha256=QG0z6uCRpIKa-BI7NfLUubb_p7_-z6WSWr8gpjWNHfY,50548
+bizon/connectors/sources/notion/src/source.py,sha256=aViwfLuBzsNGZHwU4-z-xI40cROJTvx7Tlkw3ApF3q8,66217
 bizon/connectors/sources/notion/tests/notion_pipeline.py,sha256=lyiD9b5uUF3oih8vY4gk7QXnfySGSawnbrBuSdTLym8,200
 bizon/connectors/sources/notion/tests/test_notion.py,sha256=-G0DbTLDS2Gc_Bx8xR2VXnY89vW64s1-puwPc9x2N7A,4029
 bizon/connectors/sources/periscope/config/periscope_charts.example.yml,sha256=9OgFDB7vguiNz2F2fmRqDNV8S_ddO9ncN5hgW9MhME4,350
@@ -88,7 +94,7 @@ bizon/engine/backend/adapters/sqlalchemy/backend.py,sha256=ipJ7eY_iiqjrvtq4NS39C
 bizon/engine/backend/adapters/sqlalchemy/config.py,sha256=CeTWncVK27Y6lEKMVCF5RxD8Illhx2IQqqFkGrf0WKA,1845
 bizon/engine/pipeline/consumer.py,sha256=DtCR3mG791h35poYJdXjL9geNO-GWPKl_YC0zPsF5qI,3207
 bizon/engine/pipeline/models.py,sha256=qOra2MJGN6-PuouKpKuZRjutnQmzom0mgWDFZ16LcM8,405
-bizon/engine/pipeline/producer.py,sha256=8e7cKcZh3_Irz4ceb3NzIDD8X915U26eGONqgNiYpKQ,10017
+bizon/engine/pipeline/producer.py,sha256=XV2fR6CNMRlbYwqTl9mlqy6nkG37ODyh2aiiTZ371VM,11995
 bizon/engine/queue/config.py,sha256=0XwiQSB2OKTs-rODCSZqT5txNZzGOic2-PvODbcSrGg,1267
 bizon/engine/queue/queue.py,sha256=Y9uj31d-ZgW2f0F02iccp_o-m-RoMm_jR61NkLdMQ2M,3461
 bizon/engine/queue/adapters/kafka/config.py,sha256=ndNEXRT-nIgyWgoqlNXFhmlN206v87GobXIW9Z0zrSA,1085
@@ -113,10 +119,10 @@ bizon/monitoring/datadog/monitor.py,sha256=YSdyMVEIjkDyp91_mGED_kx8j76MbQyQGkGJC
 bizon/monitoring/noop/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 bizon/monitoring/noop/monitor.py,sha256=Pu7Qt9SpUG1UvC8aWysgtoDY-t5tnKd4FlUXAC4MjbI,1066
 bizon/source/callback.py,sha256=lfTwU_bzJwR0q5sbiKoK8uedQ-dhfHzoYkPVqm8b_Ho,602
-bizon/source/config.py,sha256=0wQiX8VJJq9w0hALJkqIxMH4Wa760LhsieCy2VmUcfo,2223
+bizon/source/config.py,sha256=JyZbKjlU0xhiyuuIGJYJPGUl9JxS4xyGeCyHoHgHHos,2473
 bizon/source/cursor.py,sha256=Wjh9eNEiHV5P9YnjS5bdS2ahyFc0gPm9QLQtD-QjQCI,4089
 bizon/source/discover.py,sha256=h9IVqtAQsTH-XxR-UkAFgNvEphLP2LgataQCCuHbGrk,11174
-bizon/source/models.py,sha256=cncnIgZF-kQM0e08trvNgOHQ6AnQK8ko3GzgOGuWwgE,1705
+bizon/source/models.py,sha256=CHPKvO9chRi85WPDfLYy9vWnPsua8LTwYvjjN7Dj2uA,1837
 bizon/source/session.py,sha256=klbCv0g6sm6ac-pzM50eAJSP8DdQ9DOegHgjpmKKUrI,1978
 bizon/source/source.py,sha256=k_fHOOvam5ixZ9oPuQzUa9Kq3jVvv2HY7ghrCo-0o3I,4342
 bizon/source/auth/builder.py,sha256=hc4zBNj31LZc-QqgIyx1VQEYTm9Xv81vY5pJiwQroJo,860
@@ -129,8 +135,8 @@ bizon/source/auth/authenticators/oauth.py,sha256=tY_UZsWTy4FkifqJ7-smPaD61gg1dMJ
 bizon/source/auth/authenticators/token.py,sha256=P6SKRAarAEv28YiWp8hQLSKAV7twNlyNTGRr9sxlx58,956
 bizon/transform/config.py,sha256=Q9F7jlsuaXK8OYrO5qcdk8lxXTDoIgzoVMhhHW3igEw,213
 bizon/transform/transform.py,sha256=Ufla8YFx9C9WEiN0ppmZS1a86Sk0PgggqC-8DIvDeAQ,1414
-bizon-0.2.0.dist-info/METADATA,sha256=ArxHLLJlMkVTo6mYoaeiLco_inGdcjPivIB9gOki-QA,6322
-bizon-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-bizon-0.2.0.dist-info/entry_points.txt,sha256=hHZPN-V6JwwhSYWNCKVu3WNxekuhXtIAaz_zdwO7NDo,45
-bizon-0.2.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-bizon-0.2.0.dist-info/RECORD,,
+bizon-0.3.0.dist-info/METADATA,sha256=oX7OZjHhKAVvQ8UiRS0ksqu3C65t2kOp2mAfXoEBdJY,11159
+bizon-0.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+bizon-0.3.0.dist-info/entry_points.txt,sha256=hHZPN-V6JwwhSYWNCKVu3WNxekuhXtIAaz_zdwO7NDo,45
+bizon-0.3.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+bizon-0.3.0.dist-info/RECORD,,

{bizon-0.2.0.dist-info → bizon-0.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{bizon-0.2.0.dist-info → bizon-0.3.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{bizon-0.2.0.dist-info → bizon-0.3.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

bizon 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

bizon 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl