pirn-core 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pirn/__init__.py +92 -0
- pirn/_domain_discovery.py +83 -0
- pirn/_migrate/__init__.py +13 -0
- pirn/_migrate/import_rewriter.py +110 -0
- pirn/_migrate/main.py +76 -0
- pirn/backends/AGENTIC_USE.md +168 -0
- pirn/backends/__init__.py +12 -0
- pirn/backends/_signer.py +92 -0
- pirn/backends/azure.py +95 -0
- pirn/backends/base/__init__.py +0 -0
- pirn/backends/base/_cloud_object_store.py +226 -0
- pirn/backends/base/data_store.py +52 -0
- pirn/backends/base/run_history.py +118 -0
- pirn/backends/base/subscribable_store.py +44 -0
- pirn/backends/base/tapestry_snapshot.py +16 -0
- pirn/backends/base/tapestry_store.py +51 -0
- pirn/backends/disk.py +145 -0
- pirn/backends/duckdb.py +305 -0
- pirn/backends/gcs.py +89 -0
- pirn/backends/in_memory/__init__.py +0 -0
- pirn/backends/in_memory/in_memory_data_store.py +66 -0
- pirn/backends/in_memory/in_memory_history.py +140 -0
- pirn/backends/in_memory/in_memory_store.py +110 -0
- pirn/backends/postgres/__init__.py +4 -0
- pirn/backends/postgres/_lazy_pool.py +72 -0
- pirn/backends/postgres/postgres_history.py +362 -0
- pirn/backends/postgres/postgres_store.py +297 -0
- pirn/backends/s3.py +178 -0
- pirn/backends/sqlite/__init__.py +4 -0
- pirn/backends/sqlite/_migrations.py +30 -0
- pirn/backends/sqlite/sqlite_history.py +382 -0
- pirn/backends/sqlite/sqlite_store.py +151 -0
- pirn/backends/valkey/__init__.py +4 -0
- pirn/backends/valkey/_lazy_client.py +57 -0
- pirn/backends/valkey/valkey_data_store.py +159 -0
- pirn/backends/valkey/valkey_store.py +249 -0
- pirn/check/__init__.py +0 -0
- pirn/check/_loader.py +31 -0
- pirn/check/main.py +44 -0
- pirn/check/validation_issue.py +14 -0
- pirn/check/validation_result.py +22 -0
- pirn/check/validator.py +86 -0
- pirn/connectors/AGENTIC_USE.md +171 -0
- pirn/connectors/__init__.py +17 -0
- pirn/connectors/api_client.py +93 -0
- pirn/connectors/bi_catalog/AGENTIC_USE.md +107 -0
- pirn/connectors/bi_catalog/__init__.py +1 -0
- pirn/connectors/bi_catalog/airbyte_client.py +178 -0
- pirn/connectors/bi_catalog/airbyte_config.py +41 -0
- pirn/connectors/bi_catalog/alation_client.py +215 -0
- pirn/connectors/bi_catalog/alation_config.py +36 -0
- pirn/connectors/bi_catalog/datahub_client.py +212 -0
- pirn/connectors/bi_catalog/datahub_config.py +28 -0
- pirn/connectors/bi_catalog/dbt_artifacts_config.py +24 -0
- pirn/connectors/bi_catalog/dbt_artifacts_reader.py +184 -0
- pirn/connectors/bi_catalog/fivetran_client.py +172 -0
- pirn/connectors/bi_catalog/fivetran_config.py +33 -0
- pirn/connectors/bi_catalog/open_metadata_client.py +219 -0
- pirn/connectors/bi_catalog/open_metadata_config.py +28 -0
- pirn/connectors/capabilities/__init__.py +8 -0
- pirn/connectors/capabilities/event_emitter.py +40 -0
- pirn/connectors/capabilities/metadata_catalog.py +36 -0
- pirn/connectors/capabilities/metric_query.py +33 -0
- pirn/connectors/capabilities/record_writer.py +26 -0
- pirn/connectors/capabilities/table_source.py +47 -0
- pirn/connectors/connection_config.py +101 -0
- pirn/connectors/connection_config_decorator.py +52 -0
- pirn/connectors/database_connection_pool.py +110 -0
- pirn/connectors/databases/AGENTIC_USE.md +102 -0
- pirn/connectors/databases/__init__.py +0 -0
- pirn/connectors/databases/_bigquery_stub_job_config.py +18 -0
- pirn/connectors/databases/bigquery_config.py +34 -0
- pirn/connectors/databases/bigquery_pool.py +196 -0
- pirn/connectors/databases/clickhouse_config.py +37 -0
- pirn/connectors/databases/clickhouse_pool.py +175 -0
- pirn/connectors/databases/databricks_config.py +35 -0
- pirn/connectors/databases/databricks_pool.py +153 -0
- pirn/connectors/databases/dremio_config.py +41 -0
- pirn/connectors/databases/dremio_pool.py +134 -0
- pirn/connectors/databases/duckdb_config.py +60 -0
- pirn/connectors/databases/duckdb_pool.py +89 -0
- pirn/connectors/databases/mssql_config.py +51 -0
- pirn/connectors/databases/mssql_pool.py +163 -0
- pirn/connectors/databases/mysql_config.py +38 -0
- pirn/connectors/databases/mysql_pool.py +185 -0
- pirn/connectors/databases/oracle_config.py +35 -0
- pirn/connectors/databases/oracle_pool.py +166 -0
- pirn/connectors/databases/postgres_config.py +30 -0
- pirn/connectors/databases/postgres_pool.py +107 -0
- pirn/connectors/databases/redshift_config.py +31 -0
- pirn/connectors/databases/redshift_pool.py +111 -0
- pirn/connectors/databases/snowflake_config.py +33 -0
- pirn/connectors/databases/snowflake_pool.py +156 -0
- pirn/connectors/databases/sqlite_config.py +56 -0
- pirn/connectors/databases/sqlite_pool.py +105 -0
- pirn/connectors/document/AGENTIC_USE.md +94 -0
- pirn/connectors/document/__init__.py +0 -0
- pirn/connectors/document/arangodb_config.py +22 -0
- pirn/connectors/document/arangodb_pool.py +101 -0
- pirn/connectors/document/cosmosdb_config.py +29 -0
- pirn/connectors/document/cosmosdb_pool.py +113 -0
- pirn/connectors/document/couchbase_config.py +31 -0
- pirn/connectors/document/couchbase_pool.py +107 -0
- pirn/connectors/document/couchdb_config.py +21 -0
- pirn/connectors/document/couchdb_pool.py +120 -0
- pirn/connectors/document/firestore_config.py +27 -0
- pirn/connectors/document/firestore_pool.py +134 -0
- pirn/connectors/document/mongodb_config.py +30 -0
- pirn/connectors/document/mongodb_pool.py +121 -0
- pirn/connectors/dsn_scrubber.py +40 -0
- pirn/connectors/file_format.py +70 -0
- pirn/connectors/file_formats/AGENTIC_USE.md +228 -0
- pirn/connectors/file_formats/__init__.py +10 -0
- pirn/connectors/file_formats/_html_stripper.py +33 -0
- pirn/connectors/file_formats/_sam_utils.py +183 -0
- pirn/connectors/file_formats/aac_format.py +76 -0
- pirn/connectors/file_formats/archive_file_format.py +276 -0
- pirn/connectors/file_formats/arrow_ipc_format.py +101 -0
- pirn/connectors/file_formats/asdf_format.py +88 -0
- pirn/connectors/file_formats/avro_format.py +106 -0
- pirn/connectors/file_formats/bam_format.py +107 -0
- pirn/connectors/file_formats/batch_file_format.py +51 -0
- pirn/connectors/file_formats/bcf_format.py +233 -0
- pirn/connectors/file_formats/bdf_format.py +203 -0
- pirn/connectors/file_formats/bids_dataset_format.py +136 -0
- pirn/connectors/file_formats/brainvision_format.py +339 -0
- pirn/connectors/file_formats/cda_xml_format.py +179 -0
- pirn/connectors/file_formats/codec.py +28 -0
- pirn/connectors/file_formats/codecs/__init__.py +8 -0
- pirn/connectors/file_formats/codecs/bzip2_codec.py +49 -0
- pirn/connectors/file_formats/codecs/gzip_codec.py +52 -0
- pirn/connectors/file_formats/codecs/lz4_codec.py +53 -0
- pirn/connectors/file_formats/codecs/snappy_codec.py +56 -0
- pirn/connectors/file_formats/codecs/zstd_codec.py +68 -0
- pirn/connectors/file_formats/compressed_file_format.py +113 -0
- pirn/connectors/file_formats/cram_format.py +127 -0
- pirn/connectors/file_formats/csv_format.py +167 -0
- pirn/connectors/file_formats/define_xml_format.py +136 -0
- pirn/connectors/file_formats/dicom_format.py +294 -0
- pirn/connectors/file_formats/dlis_format.py +79 -0
- pirn/connectors/file_formats/docx_format.py +98 -0
- pirn/connectors/file_formats/edf_format.py +236 -0
- pirn/connectors/file_formats/edf_plus_format.py +98 -0
- pirn/connectors/file_formats/epub_format.py +200 -0
- pirn/connectors/file_formats/fasta_format.py +171 -0
- pirn/connectors/file_formats/fastq_format.py +161 -0
- pirn/connectors/file_formats/feather_format.py +76 -0
- pirn/connectors/file_formats/fhir_json_format.py +119 -0
- pirn/connectors/file_formats/fhir_xml_format.py +196 -0
- pirn/connectors/file_formats/fits_format.py +117 -0
- pirn/connectors/file_formats/flac_format.py +95 -0
- pirn/connectors/file_formats/geojson_format.py +144 -0
- pirn/connectors/file_formats/geopackage_format.py +201 -0
- pirn/connectors/file_formats/geotiff_format.py +208 -0
- pirn/connectors/file_formats/gguf_format.py +188 -0
- pirn/connectors/file_formats/grib_format.py +131 -0
- pirn/connectors/file_formats/hdf5_format.py +212 -0
- pirn/connectors/file_formats/heic_format.py +130 -0
- pirn/connectors/file_formats/hl7v2_format.py +151 -0
- pirn/connectors/file_formats/html_format.py +154 -0
- pirn/connectors/file_formats/joblib_format.py +128 -0
- pirn/connectors/file_formats/jpeg_format.py +121 -0
- pirn/connectors/file_formats/json_format.py +114 -0
- pirn/connectors/file_formats/jsonl_format.py +85 -0
- pirn/connectors/file_formats/kml_format.py +191 -0
- pirn/connectors/file_formats/las_format.py +106 -0
- pirn/connectors/file_formats/m4a_format.py +75 -0
- pirn/connectors/file_formats/markdown_format.py +240 -0
- pirn/connectors/file_formats/matlab_mat_format.py +211 -0
- pirn/connectors/file_formats/mp3_format.py +76 -0
- pirn/connectors/file_formats/mzml_format.py +201 -0
- pirn/connectors/file_formats/netcdf4_format.py +169 -0
- pirn/connectors/file_formats/netcdf_format.py +270 -0
- pirn/connectors/file_formats/nifti_format.py +99 -0
- pirn/connectors/file_formats/numpy_npy_format.py +207 -0
- pirn/connectors/file_formats/numpy_npz_format.py +193 -0
- pirn/connectors/file_formats/ods_format.py +195 -0
- pirn/connectors/file_formats/ogg_format.py +94 -0
- pirn/connectors/file_formats/onnx_format.py +107 -0
- pirn/connectors/file_formats/open_slide_format.py +145 -0
- pirn/connectors/file_formats/orc_format.py +77 -0
- pirn/connectors/file_formats/parquet_format.py +119 -0
- pirn/connectors/file_formats/pdf_format.py +139 -0
- pirn/connectors/file_formats/plain_text_format.py +220 -0
- pirn/connectors/file_formats/png_format.py +106 -0
- pirn/connectors/file_formats/pptx_format.py +145 -0
- pirn/connectors/file_formats/prodml_format.py +122 -0
- pirn/connectors/file_formats/pytorch_format.py +152 -0
- pirn/connectors/file_formats/resqml_format.py +124 -0
- pirn/connectors/file_formats/root_format.py +109 -0
- pirn/connectors/file_formats/rtf_format.py +115 -0
- pirn/connectors/file_formats/safetensors_format.py +185 -0
- pirn/connectors/file_formats/sam_format.py +110 -0
- pirn/connectors/file_formats/sdtm_xpt_format.py +143 -0
- pirn/connectors/file_formats/segd_format.py +89 -0
- pirn/connectors/file_formats/segy_format.py +141 -0
- pirn/connectors/file_formats/shapefile_format.py +162 -0
- pirn/connectors/file_formats/streaming_file_format.py +22 -0
- pirn/connectors/file_formats/tf_saved_model_format.py +136 -0
- pirn/connectors/file_formats/tflite_format.py +133 -0
- pirn/connectors/file_formats/tiff_format.py +174 -0
- pirn/connectors/file_formats/tsv_format.py +43 -0
- pirn/connectors/file_formats/vcf_format.py +231 -0
- pirn/connectors/file_formats/wav_format.py +75 -0
- pirn/connectors/file_formats/webp_format.py +135 -0
- pirn/connectors/file_formats/witsml_format.py +131 -0
- pirn/connectors/file_formats/xlsx_format.py +168 -0
- pirn/connectors/file_formats/zarr_format.py +258 -0
- pirn/connectors/graph/AGENTIC_USE.md +72 -0
- pirn/connectors/graph/__init__.py +0 -0
- pirn/connectors/graph/memgraph_config.py +23 -0
- pirn/connectors/graph/memgraph_pool.py +92 -0
- pirn/connectors/graph/neo4j_config.py +23 -0
- pirn/connectors/graph/neo4j_pool.py +114 -0
- pirn/connectors/graph/orientdb_config.py +23 -0
- pirn/connectors/graph/orientdb_pool.py +101 -0
- pirn/connectors/knots/__init__.py +0 -0
- pirn/connectors/knots/database_connection_pool_knot.py +33 -0
- pirn/connectors/knots/database_execute_sink.py +90 -0
- pirn/connectors/knots/database_query_source.py +86 -0
- pirn/connectors/knots/message_broker_knot.py +31 -0
- pirn/connectors/knots/message_broker_publish_sink.py +89 -0
- pirn/connectors/knots/object_store_knot.py +31 -0
- pirn/connectors/knots/object_store_list_source.py +61 -0
- pirn/connectors/knots/object_store_read_source.py +73 -0
- pirn/connectors/knots/object_store_write_sink.py +59 -0
- pirn/connectors/message_broker.py +54 -0
- pirn/connectors/messaging/AGENTIC_USE.md +91 -0
- pirn/connectors/messaging/__init__.py +0 -0
- pirn/connectors/messaging/discord_client.py +158 -0
- pirn/connectors/messaging/discord_config.py +33 -0
- pirn/connectors/messaging/google_chat_client.py +114 -0
- pirn/connectors/messaging/google_chat_config.py +26 -0
- pirn/connectors/messaging/pagerduty_client.py +206 -0
- pirn/connectors/messaging/pagerduty_config.py +32 -0
- pirn/connectors/messaging/slack_client.py +139 -0
- pirn/connectors/messaging/slack_config.py +33 -0
- pirn/connectors/messaging/teams_client.py +145 -0
- pirn/connectors/messaging/teams_config.py +26 -0
- pirn/connectors/messaging/telegram_client.py +146 -0
- pirn/connectors/messaging/telegram_config.py +42 -0
- pirn/connectors/object_storage/AGENTIC_USE.md +93 -0
- pirn/connectors/object_storage/__init__.py +0 -0
- pirn/connectors/object_storage/azure_blob_config.py +44 -0
- pirn/connectors/object_storage/azure_blob_store.py +146 -0
- pirn/connectors/object_storage/gcs_config.py +34 -0
- pirn/connectors/object_storage/gcs_store.py +132 -0
- pirn/connectors/object_storage/hdfs_config.py +39 -0
- pirn/connectors/object_storage/hdfs_store.py +250 -0
- pirn/connectors/object_storage/local_filesystem_config.py +32 -0
- pirn/connectors/object_storage/local_filesystem_store.py +118 -0
- pirn/connectors/object_storage/s3_config.py +46 -0
- pirn/connectors/object_storage/s3_store.py +132 -0
- pirn/connectors/object_store.py +64 -0
- pirn/connectors/observability/AGENTIC_USE.md +106 -0
- pirn/connectors/observability/__init__.py +1 -0
- pirn/connectors/observability/datadog_client.py +262 -0
- pirn/connectors/observability/datadog_config.py +31 -0
- pirn/connectors/observability/grafana_client.py +262 -0
- pirn/connectors/observability/grafana_config.py +27 -0
- pirn/connectors/observability/opentelemetry_config.py +41 -0
- pirn/connectors/observability/opentelemetry_span_emitter.py +122 -0
- pirn/connectors/observability/prometheus_client.py +193 -0
- pirn/connectors/observability/prometheus_config.py +28 -0
- pirn/connectors/saas/AGENTIC_USE.md +113 -0
- pirn/connectors/saas/__init__.py +1 -0
- pirn/connectors/saas/airtable_client.py +219 -0
- pirn/connectors/saas/airtable_config.py +35 -0
- pirn/connectors/saas/amplitude_client.py +172 -0
- pirn/connectors/saas/amplitude_config.py +26 -0
- pirn/connectors/saas/github_client.py +212 -0
- pirn/connectors/saas/github_config.py +35 -0
- pirn/connectors/saas/google_analytics_client.py +202 -0
- pirn/connectors/saas/google_analytics_config.py +30 -0
- pirn/connectors/saas/hubspot_client.py +192 -0
- pirn/connectors/saas/hubspot_config.py +29 -0
- pirn/connectors/saas/jira_client.py +206 -0
- pirn/connectors/saas/jira_config.py +34 -0
- pirn/connectors/saas/mixpanel_client.py +155 -0
- pirn/connectors/saas/mixpanel_config.py +35 -0
- pirn/connectors/saas/salesforce_client.py +242 -0
- pirn/connectors/saas/salesforce_config.py +36 -0
- pirn/connectors/saas/shopify_client.py +278 -0
- pirn/connectors/saas/shopify_config.py +29 -0
- pirn/connectors/saas/stripe_client.py +189 -0
- pirn/connectors/saas/stripe_config.py +27 -0
- pirn/connectors/saas/twilio_client.py +181 -0
- pirn/connectors/saas/twilio_config.py +30 -0
- pirn/connectors/saas/zendesk_client.py +221 -0
- pirn/connectors/saas/zendesk_config.py +33 -0
- pirn/connectors/streaming/AGENTIC_USE.md +100 -0
- pirn/connectors/streaming/__init__.py +0 -0
- pirn/connectors/streaming/azure_servicebus_broker.py +198 -0
- pirn/connectors/streaming/azure_servicebus_config.py +18 -0
- pirn/connectors/streaming/azure_servicebus_stub_message.py +25 -0
- pirn/connectors/streaming/kafka_broker.py +144 -0
- pirn/connectors/streaming/kafka_config.py +27 -0
- pirn/connectors/streaming/kinesis_broker.py +150 -0
- pirn/connectors/streaming/kinesis_config.py +26 -0
- pirn/connectors/streaming/pubsub_broker.py +211 -0
- pirn/connectors/streaming/pubsub_config.py +18 -0
- pirn/connectors/streaming/rabbitmq_broker.py +163 -0
- pirn/connectors/streaming/rabbitmq_config.py +22 -0
- pirn/connectors/streaming/rabbitmq_plain_message.py +23 -0
- pirn/connectors/streaming/valkey_record.py +52 -0
- pirn/connectors/streaming/valkey_stream_broker.py +128 -0
- pirn/connectors/streaming/valkey_stream_config.py +24 -0
- pirn/connectors/timeseries/AGENTIC_USE.md +109 -0
- pirn/connectors/timeseries/__init__.py +0 -0
- pirn/connectors/timeseries/influxdb_config.py +32 -0
- pirn/connectors/timeseries/influxdb_pool.py +124 -0
- pirn/connectors/timeseries/kdb_config.py +26 -0
- pirn/connectors/timeseries/kdb_pool.py +151 -0
- pirn/connectors/timeseries/questdb_config.py +32 -0
- pirn/connectors/timeseries/questdb_pool.py +94 -0
- pirn/connectors/timeseries/timescaledb_config.py +31 -0
- pirn/connectors/timeseries/timescaledb_pool.py +111 -0
- pirn/connectors/timeseries/victoriametrics_config.py +26 -0
- pirn/connectors/timeseries/victoriametrics_pool.py +129 -0
- pirn/connectors/transports/__init__.py +1 -0
- pirn/connectors/transports/object_store_transport.py +170 -0
- pirn/connectors/transports/valkey_transport.py +227 -0
- pirn/core/__init__.py +1 -0
- pirn/core/assembler.py +39 -0
- pirn/core/disassembler.py +43 -0
- pirn/core/err.py +46 -0
- pirn/core/error_policy.py +19 -0
- pirn/core/hashing.py +159 -0
- pirn/core/identity/__init__.py +15 -0
- pirn/core/identity/chained_identity_resolver.py +20 -0
- pirn/core/identity/env_identity_resolver.py +35 -0
- pirn/core/identity/identity_resolver.py +15 -0
- pirn/core/identity/null_identity_resolver.py +10 -0
- pirn/core/identity/os_identity_resolver.py +12 -0
- pirn/core/identity/static_identity_resolver.py +16 -0
- pirn/core/knot.py +611 -0
- pirn/core/knot_config.py +95 -0
- pirn/core/knot_factory.py +108 -0
- pirn/core/knot_source.py +86 -0
- pirn/core/lineage.py +133 -0
- pirn/core/ok.py +38 -0
- pirn/core/optional.py +217 -0
- pirn/core/parameter.py +173 -0
- pirn/core/parameter_spec.py +19 -0
- pirn/core/payload.py +62 -0
- pirn/core/pirn_opaque_value.py +85 -0
- pirn/core/providers/__init__.py +0 -0
- pirn/core/providers/embedding_provider.py +29 -0
- pirn/core/providers/llm_provider.py +66 -0
- pirn/core/result.py +19 -0
- pirn/core/run_context.py +112 -0
- pirn/core/run_request.py +47 -0
- pirn/core/run_result.py +120 -0
- pirn/core/sentinels/__init__.py +0 -0
- pirn/core/sentinels/_unset.py +9 -0
- pirn/core/skipped.py +33 -0
- pirn/core/transport/__init__.py +1 -0
- pirn/core/transport/data_transport.py +116 -0
- pirn/core/transport/dual_write_transport.py +109 -0
- pirn/core/transport/filesystem_transport.py +276 -0
- pirn/core/transport/inline_transport.py +87 -0
- pirn/core/transport/serializers/__init__.py +1 -0
- pirn/core/transport/serializers/numpy_serializer.py +53 -0
- pirn/core/transport/serializers/pickle_serializer.py +39 -0
- pirn/core/transport/serializers/serialiser_error.py +8 -0
- pirn/core/transport/serializers/serializer.py +46 -0
- pirn/core/transport/serializers/serializer_registry.py +91 -0
- pirn/core/transport/smart_transport.py +131 -0
- pirn/core/transport/transport_error.py +8 -0
- pirn/core/transport/transport_handle.py +49 -0
- pirn/domains/__init__.py +44 -0
- pirn/domains/_domain_compat_finder.py +143 -0
- pirn/domains/_domain_compat_loader.py +29 -0
- pirn/emitters/AGENTIC_USE.md +107 -0
- pirn/emitters/__init__.py +14 -0
- pirn/emitters/base.py +70 -0
- pirn/emitters/emitter_error_policy.py +16 -0
- pirn/emitters/kafka.py +147 -0
- pirn/emitters/log.py +118 -0
- pirn/emitters/otel.py +160 -0
- pirn/emitters/valkey.py +102 -0
- pirn/emitters/webhook.py +208 -0
- pirn/engine/__init__.py +1 -0
- pirn/engine/_emitter_subscriber.py +25 -0
- pirn/engine/dispatchers/AGENTIC_USE.md +134 -0
- pirn/engine/dispatchers/__init__.py +0 -0
- pirn/engine/dispatchers/celery_dispatcher.py +125 -0
- pirn/engine/dispatchers/dask_dispatcher.py +101 -0
- pirn/engine/dispatchers/dispatcher.py +22 -0
- pirn/engine/dispatchers/local_dispatcher.py +25 -0
- pirn/engine/dispatchers/ray_dispatcher.py +95 -0
- pirn/engine/dispatchers/thread_dispatcher.py +45 -0
- pirn/engine/engine.py +655 -0
- pirn/engine/shed/__init__.py +0 -0
- pirn/engine/shed/edge.py +13 -0
- pirn/engine/shed/shed.py +195 -0
- pirn/engine/shed/shed_error.py +6 -0
- pirn/exceptions/__init__.py +0 -0
- pirn/exceptions/data_integrity_error.py +7 -0
- pirn/exceptions/duplicate_knot_error.py +7 -0
- pirn/exceptions/invalid_branch_error.py +7 -0
- pirn/exceptions/pipeline_load_error.py +7 -0
- pirn/exceptions/pirn_config_error.py +7 -0
- pirn/exceptions/pirn_error.py +5 -0
- pirn/exceptions/tapestry_error.py +7 -0
- pirn/exceptions/unbound_parameter_error.py +7 -0
- pirn/managers/__init__.py +1 -0
- pirn/managers/exception_manager.py +67 -0
- pirn/managers/exception_record.py +39 -0
- pirn/managers/knot_state.py +13 -0
- pirn/managers/rebindable_exception.py +24 -0
- pirn/managers/redact.py +26 -0
- pirn/managers/status_event.py +19 -0
- pirn/managers/status_manager.py +59 -0
- pirn/nodes/AGENTIC_USE.md +186 -0
- pirn/nodes/__init__.py +14 -0
- pirn/nodes/aggregator.py +125 -0
- pirn/nodes/branch/__init__.py +0 -0
- pirn/nodes/branch/_branch_not_selected.py +5 -0
- pirn/nodes/branch/branch.py +142 -0
- pirn/nodes/branch/branch_output.py +77 -0
- pirn/nodes/continuation.py +201 -0
- pirn/nodes/gate/__init__.py +0 -0
- pirn/nodes/gate/_gate_closed.py +5 -0
- pirn/nodes/gate/gate.py +104 -0
- pirn/nodes/loop_sub_tapestry.py +276 -0
- pirn/nodes/map_markers.py +112 -0
- pirn/nodes/reduce_.py +154 -0
- pirn/nodes/sink.py +66 -0
- pirn/nodes/source.py +68 -0
- pirn/nodes/sub_tapestry.py +249 -0
- pirn/replay.py +156 -0
- pirn/streaming/AGENTIC_USE.md +125 -0
- pirn/streaming/__init__.py +20 -0
- pirn/streaming/base.py +103 -0
- pirn/streaming/file_tail.py +75 -0
- pirn/streaming/iterable.py +48 -0
- pirn/streaming/kafka.py +93 -0
- pirn/streaming/trigger_adapter.py +62 -0
- pirn/tapestry.py +319 -0
- pirn/triggers/AGENTIC_USE.md +136 -0
- pirn/triggers/__init__.py +11 -0
- pirn/triggers/base.py +91 -0
- pirn/triggers/cron.py +150 -0
- pirn/triggers/http.py +238 -0
- pirn/triggers/kafka.py +138 -0
- pirn/triggers/valkey.py +132 -0
- pirn/viz/__init__.py +11 -0
- pirn/viz/_explore_cli.py +64 -0
- pirn/viz/_scanner.py +375 -0
- pirn/viz/_tapestry_graph.py +26 -0
- pirn/viz/explorer.py +1541 -0
- pirn/viz/html.py +427 -0
- pirn/viz/mermaid.py +149 -0
- pirn/yaml_loader/__init__.py +6 -0
- pirn/yaml_loader/loader.py +444 -0
- pirn/yaml_loader/specs/__init__.py +0 -0
- pirn/yaml_loader/specs/aggregator_spec.py +15 -0
- pirn/yaml_loader/specs/branch_spec.py +14 -0
- pirn/yaml_loader/specs/gate_spec.py +11 -0
- pirn/yaml_loader/specs/knot_spec.py +14 -0
- pirn/yaml_loader/specs/map_spec.py +15 -0
- pirn/yaml_loader/specs/node_spec.py +15 -0
- pirn/yaml_loader/specs/pipeline_spec.py +45 -0
- pirn/yaml_loader/specs/reduce_spec.py +15 -0
- pirn/yaml_loader/specs/sink_spec.py +14 -0
- pirn/yaml_loader/specs/source_spec.py +15 -0
- pirn/yaml_loader/specs/yaml_parameter_spec.py +17 -0
- pirn_core-0.4.0.dist-info/METADATA +352 -0
- pirn_core-0.4.0.dist-info/RECORD +472 -0
- pirn_core-0.4.0.dist-info/WHEEL +4 -0
- pirn_core-0.4.0.dist-info/entry_points.txt +4 -0
pirn/__init__.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""pirn — a pipeline framework where everything is a knot.
|
|
2
|
+
|
|
3
|
+
Knot discovery
|
|
4
|
+
--------------
|
|
5
|
+
At import time, pirn calls :meth:`sweet_tea.registry.Registry.fill_registry`
|
|
6
|
+
over its own package tree. Every :class:`pirn.core.knot.Knot` subclass shipped
|
|
7
|
+
with pirn is auto-registered under ``library="pirn"`` with the lowercase
|
|
8
|
+
class name as its registry key (CamelCase, snake_case, and no-underscore
|
|
9
|
+
variations all resolve to the same entry through sweet_tea's
|
|
10
|
+
:meth:`BaseFactory._generate_key_variations`).
|
|
11
|
+
|
|
12
|
+
This means YAML pipelines can reference any built-in pirn knot by name
|
|
13
|
+
without ``import`` boilerplate::
|
|
14
|
+
|
|
15
|
+
nodes:
|
|
16
|
+
- id: read
|
|
17
|
+
callable: object_store_read_source
|
|
18
|
+
|
|
19
|
+
YAML name resolution goes through
|
|
20
|
+
:class:`sweet_tea.abstract_inverter_factory.AbstractInverterFactory[Knot]`
|
|
21
|
+
— sweet_tea's typed factory that returns the class definition (rather than
|
|
22
|
+
instantiating it), so the loader can supply construction kwargs later.
|
|
23
|
+
|
|
24
|
+
User projects: register your own knots
|
|
25
|
+
--------------------------------------
|
|
26
|
+
If you define your own :class:`Knot` subclasses outside the pirn package
|
|
27
|
+
(e.g. ``my_company.transforms.NormaliseAddresses``), call
|
|
28
|
+
:meth:`Registry.fill_registry` from **your** project's package init so your
|
|
29
|
+
classes are auto-discovered too::
|
|
30
|
+
|
|
31
|
+
# my_company/__init__.py
|
|
32
|
+
from sweet_tea.registry import Registry
|
|
33
|
+
|
|
34
|
+
Registry.fill_registry() # scans my_company/ and registers every class
|
|
35
|
+
|
|
36
|
+
After that, your knots are resolvable by name from YAML pipelines just like
|
|
37
|
+
pirn's built-ins. To restrict resolution to your library only, look up via
|
|
38
|
+
``AbstractInverterFactory[Knot].create(name, library="my_company")``.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
import warnings
|
|
42
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
43
|
+
|
|
44
|
+
from sweet_tea.registry import Registry
|
|
45
|
+
from sweet_tea.sweet_tea_warning import SweetTeaWarning
|
|
46
|
+
|
|
47
|
+
with warnings.catch_warnings():
|
|
48
|
+
warnings.simplefilter("ignore", SweetTeaWarning)
|
|
49
|
+
Registry.fill_registry()
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
# Core ships as the ``pirn-core`` distribution but imports as ``pirn``.
|
|
53
|
+
__version__ = version("pirn-core")
|
|
54
|
+
except PackageNotFoundError:
|
|
55
|
+
__version__ = "unknown"
|
|
56
|
+
|
|
57
|
+
# Public API re-exports — users may import from pirn directly.
|
|
58
|
+
# Registry.fill_registry() above must run first; noqa: E402 suppresses the
|
|
59
|
+
# "import not at top of file" warnings that follow from that ordering.
|
|
60
|
+
from pirn._domain_discovery import discover_installed_domains
|
|
61
|
+
from pirn.core.assembler import Assembler
|
|
62
|
+
from pirn.core.disassembler import Disassembler
|
|
63
|
+
from pirn.core.error_policy import ErrorPolicy
|
|
64
|
+
from pirn.core.knot import Knot
|
|
65
|
+
from pirn.core.knot_config import KnotConfig
|
|
66
|
+
from pirn.core.knot_factory import knot
|
|
67
|
+
from pirn.core.parameter import Parameter
|
|
68
|
+
from pirn.core.run_request import RunRequest
|
|
69
|
+
from pirn.core.run_result import RunResult
|
|
70
|
+
from pirn.nodes.loop_sub_tapestry import LoopSubTapestry
|
|
71
|
+
from pirn.nodes.sink import Sink
|
|
72
|
+
from pirn.nodes.source import Source
|
|
73
|
+
from pirn.nodes.sub_tapestry import SubTapestry
|
|
74
|
+
from pirn.tapestry import Tapestry
|
|
75
|
+
|
|
76
|
+
__all__ = [
|
|
77
|
+
"Tapestry",
|
|
78
|
+
"Knot",
|
|
79
|
+
"KnotConfig",
|
|
80
|
+
"knot",
|
|
81
|
+
"Parameter",
|
|
82
|
+
"RunRequest",
|
|
83
|
+
"RunResult",
|
|
84
|
+
"ErrorPolicy",
|
|
85
|
+
"Assembler",
|
|
86
|
+
"Disassembler",
|
|
87
|
+
"Sink",
|
|
88
|
+
"Source",
|
|
89
|
+
"SubTapestry",
|
|
90
|
+
"LoopSubTapestry",
|
|
91
|
+
"discover_installed_domains",
|
|
92
|
+
]
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Discover and import installed pirn domain packages.
|
|
2
|
+
|
|
3
|
+
Domains ship as standalone distributions (``pirn-signal``, ``pirn-data`` …)
|
|
4
|
+
whose import packages are ``pirn_signal``, ``pirn_data`` … Importing each one
|
|
5
|
+
triggers its ``Registry.fill_registry()`` self-registration, making its knots
|
|
6
|
+
resolvable by bare name through sweet_tea's factory. This module finds which
|
|
7
|
+
domain distributions are installed and imports the corresponding packages.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import importlib
|
|
13
|
+
from importlib.metadata import distributions
|
|
14
|
+
from importlib.util import find_spec
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class _DomainDiscovery:
|
|
18
|
+
"""Locate installed pirn domain import-packages and import them.
|
|
19
|
+
|
|
20
|
+
The six domain names are fixed framework data, kept as an instance
|
|
21
|
+
attribute rather than a module constant. ``pirn`` (core) is intentionally
|
|
22
|
+
excluded — it self-registers on its own import.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self) -> None:
|
|
26
|
+
self._domains: tuple[str, ...] = (
|
|
27
|
+
"agents",
|
|
28
|
+
"data",
|
|
29
|
+
"health",
|
|
30
|
+
"ml",
|
|
31
|
+
"oilgas",
|
|
32
|
+
"signal",
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def installed_import_names(self) -> tuple[str, ...]:
|
|
36
|
+
"""Return the ``pirn_<x>`` import names whose distribution is installed.
|
|
37
|
+
|
|
38
|
+
Introspects installed distributions via
|
|
39
|
+
:func:`importlib.metadata.distributions`, matching the canonical
|
|
40
|
+
``pirn-<domain>`` distribution names, then keeps only those whose
|
|
41
|
+
import package is actually resolvable on ``sys.path``.
|
|
42
|
+
"""
|
|
43
|
+
wanted = {f"pirn-{domain}": f"pirn_{domain}" for domain in self._domains}
|
|
44
|
+
found: set[str] = set()
|
|
45
|
+
for dist in distributions():
|
|
46
|
+
dist_name = dist.metadata["Name"]
|
|
47
|
+
if dist_name is None:
|
|
48
|
+
continue
|
|
49
|
+
normalized = dist_name.replace("_", "-").lower()
|
|
50
|
+
import_name = wanted.get(normalized)
|
|
51
|
+
if import_name is not None and find_spec(import_name) is not None:
|
|
52
|
+
found.add(import_name)
|
|
53
|
+
return tuple(sorted(found))
|
|
54
|
+
|
|
55
|
+
def discover(self) -> tuple[str, ...]:
|
|
56
|
+
"""Import every installed domain package; return what was imported.
|
|
57
|
+
|
|
58
|
+
Idempotent — re-importing an already-imported module is a no-op.
|
|
59
|
+
Genuine import errors are not swallowed: they propagate wrapped in an
|
|
60
|
+
:class:`ImportError` that names the offending package for context.
|
|
61
|
+
"""
|
|
62
|
+
imported: list[str] = []
|
|
63
|
+
for import_name in self.installed_import_names():
|
|
64
|
+
try:
|
|
65
|
+
importlib.import_module(import_name)
|
|
66
|
+
except ImportError as exc:
|
|
67
|
+
raise ImportError(
|
|
68
|
+
f"failed to import discovered pirn domain {import_name!r}: {exc}"
|
|
69
|
+
) from exc
|
|
70
|
+
imported.append(import_name)
|
|
71
|
+
return tuple(imported)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def discover_installed_domains() -> tuple[str, ...]:
|
|
75
|
+
"""Import all installed pirn domain packages and return their import names.
|
|
76
|
+
|
|
77
|
+
Each imported ``pirn_<x>`` package self-registers its knots via
|
|
78
|
+
``Registry.fill_registry()``, so after this call their knots resolve by
|
|
79
|
+
bare name through sweet_tea's factory (the same path the YAML loader
|
|
80
|
+
uses). Returns the sorted tuple of import names that were imported. Safe
|
|
81
|
+
to call repeatedly.
|
|
82
|
+
"""
|
|
83
|
+
return _DomainDiscovery().discover()
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Import-compatibility codemod for the pirn monolith split (SCD-17).
|
|
2
|
+
|
|
3
|
+
When pirn was a monolith the six domains lived under ``pirn.domains.<x>``.
|
|
4
|
+
They are now standalone packages that import as ``pirn_<x>`` (for x in
|
|
5
|
+
signal, oilgas, data, ml, agents, health). This package ships a reusable,
|
|
6
|
+
idempotent, deterministic line-based rewriter that updates consumer source
|
|
7
|
+
from the old ``pirn.domains.<x>`` spellings to the new ``pirn_<x>`` ones.
|
|
8
|
+
|
|
9
|
+
It is exposed to end users as the ``pirn-migrate-imports`` console script
|
|
10
|
+
(see ``pirn._migrate.main``) and reused as the migration tool in SCD-23.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Line-based rewriter for the ``pirn.domains.<x>`` -> ``pirn_<x>`` split.
|
|
2
|
+
|
|
3
|
+
See :mod:`pirn._migrate` for background. The rewriter is a pure text
|
|
4
|
+
transform: it operates line by line on ``.py`` source, only ever touching
|
|
5
|
+
``import`` / ``from ... import`` statements that reference one of the six
|
|
6
|
+
known domains, and otherwise preserves formatting, indentation and comments
|
|
7
|
+
byte-for-byte. It is idempotent (a rewritten file is a fixed point) and
|
|
8
|
+
deterministic (identical input always yields identical output).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import re
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ImportRewriter:
|
|
18
|
+
"""Rewrites legacy ``pirn.domains.<x>`` imports to ``pirn_<x>``.
|
|
19
|
+
|
|
20
|
+
The set of domains is fixed framework data (the monolith carved out
|
|
21
|
+
exactly these six packages), so it is stored as a lowercase class
|
|
22
|
+
attribute rather than a configurable constant. Only these names are
|
|
23
|
+
ever rewritten; any other ``pirn.domains.<other>`` reference (e.g.
|
|
24
|
+
``pirn.domains.extras_loader``) is left untouched.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
_domains: tuple[str, ...] = (
|
|
28
|
+
"signal",
|
|
29
|
+
"oilgas",
|
|
30
|
+
"data",
|
|
31
|
+
"ml",
|
|
32
|
+
"agents",
|
|
33
|
+
"health",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def __init__(self) -> None:
|
|
37
|
+
domain_alt = "|".join(self._domains)
|
|
38
|
+
# The captured domain is followed by one of: end-of-token (whitespace,
|
|
39
|
+
# `.`, `,`, end-of-line) — never another identifier character — so a
|
|
40
|
+
# non-domain like `pirn.domains.datasource` cannot match `data`.
|
|
41
|
+
boundary = r"(?![A-Za-z0-9_])"
|
|
42
|
+
|
|
43
|
+
# `from pirn.domains import <x>[ as alias]` -> `import pirn_<x>[ as alias]`.
|
|
44
|
+
# Only handled when the imported name is a single bare domain.
|
|
45
|
+
self._from_domains_import = re.compile(
|
|
46
|
+
rf"^(?P<indent>\s*)from\s+pirn\.domains\s+import\s+"
|
|
47
|
+
rf"(?P<domain>{domain_alt}){boundary}"
|
|
48
|
+
rf"(?P<alias>\s+as\s+[A-Za-z_][A-Za-z0-9_]*)?\s*$"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# `from pirn.domains.<x>[.sub...] import ...` -> `from pirn_<x>[.sub...] import ...`.
|
|
52
|
+
self._from_submodule = re.compile(
|
|
53
|
+
rf"^(?P<indent>\s*)from\s+pirn\.domains\."
|
|
54
|
+
rf"(?P<domain>{domain_alt}){boundary}"
|
|
55
|
+
rf"(?P<tail>(?:\.[A-Za-z_][A-Za-z0-9_]*)*)\s+import\s"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# `import pirn.domains.<x>[.sub...][ as alias]` -> `import pirn_<x>[.sub...][ as alias]`.
|
|
59
|
+
self._import_module = re.compile(
|
|
60
|
+
rf"^(?P<indent>\s*)import\s+pirn\.domains\."
|
|
61
|
+
rf"(?P<domain>{domain_alt}){boundary}"
|
|
62
|
+
rf"(?P<tail>(?:\.[A-Za-z_][A-Za-z0-9_]*)*)"
|
|
63
|
+
rf"(?P<rest>\s+as\s+[A-Za-z_][A-Za-z0-9_]*\s*|\s*)$"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def rewrite_line(self, line: str) -> str:
|
|
67
|
+
"""Rewrite a single source line, returning it unchanged if no rule applies."""
|
|
68
|
+
match = self._from_domains_import.match(line)
|
|
69
|
+
if match is not None:
|
|
70
|
+
alias = match.group("alias") or ""
|
|
71
|
+
return f"{match.group('indent')}import pirn_{match.group('domain')}{alias}\n"
|
|
72
|
+
|
|
73
|
+
match = self._from_submodule.match(line)
|
|
74
|
+
if match is not None:
|
|
75
|
+
end = match.end()
|
|
76
|
+
head = (
|
|
77
|
+
f"{match.group('indent')}from pirn_{match.group('domain')}"
|
|
78
|
+
f"{match.group('tail')} import "
|
|
79
|
+
)
|
|
80
|
+
return head + line[end:]
|
|
81
|
+
|
|
82
|
+
match = self._import_module.match(line)
|
|
83
|
+
if match is not None:
|
|
84
|
+
return (
|
|
85
|
+
f"{match.group('indent')}import pirn_{match.group('domain')}"
|
|
86
|
+
f"{match.group('tail')}{match.group('rest')}"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
return line
|
|
90
|
+
|
|
91
|
+
def rewrite_text(self, source: str) -> str:
|
|
92
|
+
"""Rewrite every applicable import line in a source string."""
|
|
93
|
+
if "pirn.domains." not in source and "pirn.domains " not in source:
|
|
94
|
+
return source
|
|
95
|
+
lines = source.splitlines(keepends=True)
|
|
96
|
+
return "".join(self.rewrite_line(line) for line in lines)
|
|
97
|
+
|
|
98
|
+
def rewrite_file(self, path: Path) -> bool:
|
|
99
|
+
"""Rewrite a file in place. Returns ``True`` iff its contents changed."""
|
|
100
|
+
original = path.read_text(encoding="utf-8")
|
|
101
|
+
rewritten = self.rewrite_text(original)
|
|
102
|
+
if rewritten == original:
|
|
103
|
+
return False
|
|
104
|
+
path.write_text(rewritten, encoding="utf-8")
|
|
105
|
+
return True
|
|
106
|
+
|
|
107
|
+
def file_needs_rewrite(self, path: Path) -> bool:
|
|
108
|
+
"""Return ``True`` iff the file would change, without writing it."""
|
|
109
|
+
original = path.read_text(encoding="utf-8")
|
|
110
|
+
return self.rewrite_text(original) != original
|
pirn/_migrate/main.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""CLI for the ``pirn.domains.<x>`` -> ``pirn_<x>`` import codemod (SCD-17).
|
|
2
|
+
|
|
3
|
+
Exposed as the ``pirn-migrate-imports`` console script. Accepts one or more
|
|
4
|
+
files or directories. Directories are walked recursively for ``.py`` files.
|
|
5
|
+
By default rewrites in place; ``--check`` is a dry run that reports what
|
|
6
|
+
would change and exits non-zero if any file needs rewriting. Output is a
|
|
7
|
+
deterministic, sorted summary.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import argparse
|
|
13
|
+
import sys
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from pirn._migrate.import_rewriter import ImportRewriter
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _collect_py_files(paths: list[str]) -> list[Path]:
|
|
20
|
+
"""Return the sorted, de-duplicated set of ``.py`` files under ``paths``."""
|
|
21
|
+
found: set[Path] = set()
|
|
22
|
+
for raw in paths:
|
|
23
|
+
path = Path(raw)
|
|
24
|
+
if path.is_dir():
|
|
25
|
+
found.update(p for p in path.rglob("*.py") if p.is_file())
|
|
26
|
+
elif path.suffix == ".py" and path.is_file():
|
|
27
|
+
found.add(path)
|
|
28
|
+
return sorted(found)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def main(argv: list[str] | None = None) -> int:
|
|
32
|
+
"""Entry point for the ``pirn-migrate-imports`` console script."""
|
|
33
|
+
parser = argparse.ArgumentParser(
|
|
34
|
+
prog="pirn-migrate-imports",
|
|
35
|
+
description=(
|
|
36
|
+
"Rewrite legacy `pirn.domains.<x>` imports to the standalone "
|
|
37
|
+
"`pirn_<x>` packages (x in signal, oilgas, data, ml, agents, health)."
|
|
38
|
+
),
|
|
39
|
+
)
|
|
40
|
+
parser.add_argument(
|
|
41
|
+
"paths",
|
|
42
|
+
nargs="+",
|
|
43
|
+
help="Files or directories to rewrite (directories are walked recursively).",
|
|
44
|
+
)
|
|
45
|
+
parser.add_argument(
|
|
46
|
+
"--check",
|
|
47
|
+
action="store_true",
|
|
48
|
+
help="Dry run: report files that need rewriting and exit non-zero, writing nothing.",
|
|
49
|
+
)
|
|
50
|
+
args = parser.parse_args(argv)
|
|
51
|
+
|
|
52
|
+
rewriter = ImportRewriter()
|
|
53
|
+
files = _collect_py_files(args.paths)
|
|
54
|
+
changed: list[Path] = []
|
|
55
|
+
|
|
56
|
+
for path in files:
|
|
57
|
+
if args.check:
|
|
58
|
+
if rewriter.file_needs_rewrite(path):
|
|
59
|
+
changed.append(path)
|
|
60
|
+
elif rewriter.rewrite_file(path):
|
|
61
|
+
changed.append(path)
|
|
62
|
+
|
|
63
|
+
verb = "would rewrite" if args.check else "rewrote"
|
|
64
|
+
for path in changed:
|
|
65
|
+
print(f"{verb}: {path}")
|
|
66
|
+
|
|
67
|
+
if not changed:
|
|
68
|
+
print(f"no changes — scanned {len(files)} file(s)")
|
|
69
|
+
return 0
|
|
70
|
+
|
|
71
|
+
print(f"{verb} {len(changed)} of {len(files)} file(s)")
|
|
72
|
+
return 1 if args.check else 0
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
if __name__ == "__main__":
|
|
76
|
+
sys.exit(main())
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
`pirn.backends` provides pluggable implementations of the three storage protocols (`TapestryStore`, `RunHistory`, `DataStore`) and the cloud object store base — it does not execute pipelines or process domain data.
|
|
2
|
+
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
## Mental model
|
|
6
|
+
|
|
7
|
+
Every `Tapestry` holds three backend slots, each independently swappable:
|
|
8
|
+
|
|
9
|
+
| Slot | Protocol | Stores | Default |
|
|
10
|
+
|------|----------|--------|---------|
|
|
11
|
+
| `store` | `TapestryStore` | Knot registrations (the pipeline definition) | `InMemoryStore` |
|
|
12
|
+
| `history` | `RunHistory` | Run results and per-knot lineage records | `InMemoryHistory` |
|
|
13
|
+
| `data` | `DataStore` | Intermediate values keyed by content hash | `InMemoryDataStore` |
|
|
14
|
+
|
|
15
|
+
Pass backends to `Tapestry(store=..., history=..., data=...)`. Backends that are not passed default to their in-memory counterparts. The three slots are decoupled — you can persist lineage to SQLite while keeping values in memory.
|
|
16
|
+
|
|
17
|
+
`SubscribableStore` is a mixin implemented by `InMemoryStore`, `PostgresStore`, and `ValKeyStore`. It adds `subscribe()` for live notifications when knots are registered — required for `WithContinuation` and extensible runs.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Source map
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
pirn/backends/
|
|
25
|
+
├── base/
|
|
26
|
+
│ ├── tapestry_store.py TapestryStore — interface: register, get, all, snapshot
|
|
27
|
+
│ ├── run_history.py RunHistory — interface: record_run, get_run, query_lineage_*
|
|
28
|
+
│ ├── data_store.py DataStore — interface: put, get, has, delete
|
|
29
|
+
│ ├── subscribable_store.py SubscribableStore — mixin: subscribe() for live registration events
|
|
30
|
+
│ └── tapestry_snapshot.py TapestrySnapshot — frozen Pydantic model: ordered knot id list
|
|
31
|
+
├── in_memory/
|
|
32
|
+
│ ├── in_memory_store.py InMemoryStore — TapestryStore + SubscribableStore; default
|
|
33
|
+
│ ├── in_memory_history.py InMemoryHistory — RunHistory; default; not persistent
|
|
34
|
+
│ └── in_memory_data_store.py InMemoryDataStore — DataStore; default; not persistent
|
|
35
|
+
├── sqlite/
|
|
36
|
+
│ ├── sqlite_store.py SQLiteStore — TapestryStore backed by SQLite
|
|
37
|
+
│ └── sqlite_history.py SQLiteHistory — RunHistory backed by SQLite; durable
|
|
38
|
+
├── postgres/
|
|
39
|
+
│ ├── postgres_store.py PostgresStore — TapestryStore + SubscribableStore; asyncpg
|
|
40
|
+
│ └── postgres_history.py PostgresHistory — RunHistory backed by Postgres; durable
|
|
41
|
+
├── valkey/
|
|
42
|
+
│ ├── valkey_store.py ValKeyStore — TapestryStore + SubscribableStore; Valkey/Redis
|
|
43
|
+
│ └── valkey_data_store.py ValKeyDataStore — DataStore backed by Valkey/Redis; pickle-serialised
|
|
44
|
+
├── duckdb.py DuckDBHistory — RunHistory backed by DuckDB; analytical queries
|
|
45
|
+
├── s3.py S3DataStore — DataStore backed by AWS S3; pickle-serialised
|
|
46
|
+
├── gcs.py GCSDataStore — DataStore backed by Google Cloud Storage
|
|
47
|
+
├── azure.py AzureBlobDataStore — DataStore backed by Azure Blob Storage
|
|
48
|
+
└── disk.py LocalDiskDataStore — DataStore backed by local filesystem; pickle-serialised
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Canonical pattern
|
|
54
|
+
|
|
55
|
+
### Development — all in memory (default)
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from pirn import Tapestry, RunRequest
|
|
59
|
+
|
|
60
|
+
# No backends passed — all three slots use in-memory defaults.
|
|
61
|
+
with Tapestry() as t:
|
|
62
|
+
...
|
|
63
|
+
|
|
64
|
+
result = await t.run(RunRequest())
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Production — durable lineage, in-memory values
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from pirn import Tapestry, RunRequest
|
|
71
|
+
from pirn.backends.sqlite.sqlite_history import SQLiteHistory
|
|
72
|
+
|
|
73
|
+
history = SQLiteHistory(path="pirn.db")
|
|
74
|
+
|
|
75
|
+
with Tapestry(history=history) as t:
|
|
76
|
+
...
|
|
77
|
+
|
|
78
|
+
result = await t.run(RunRequest())
|
|
79
|
+
# result.lineage is now persisted across process restarts
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Querying lineage across runs
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
records = await history.query_lineage_by_knot_id("my-knot-id")
|
|
86
|
+
for rec in records:
|
|
87
|
+
print(rec.run_id, rec.outcome, rec.output_hash)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Persisting intermediate values (S3)
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
from pirn.backends.s3 import S3DataStore
|
|
94
|
+
|
|
95
|
+
data = S3DataStore(bucket="my-pirn-bucket", prefix="runs/")
|
|
96
|
+
with Tapestry(data=data) as t:
|
|
97
|
+
...
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Shared tapestry definition (Postgres — multi-process)
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
from pirn.backends.postgres.postgres_store import PostgresStore
|
|
104
|
+
from pirn.backends.postgres.postgres_history import PostgresHistory
|
|
105
|
+
|
|
106
|
+
store = PostgresStore(dsn="postgresql://user:pass@host/pirn")
|
|
107
|
+
history = PostgresHistory(dsn="postgresql://user:pass@host/pirn")
|
|
108
|
+
|
|
109
|
+
with Tapestry(store=store, history=history) as t:
|
|
110
|
+
...
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Anti-patterns
|
|
116
|
+
|
|
117
|
+
### Using cloud DataStores with untrusted infrastructure
|
|
118
|
+
|
|
119
|
+
`S3DataStore`, `GCSDataStore`, `AzureBlobDataStore`, and `LocalDiskDataStore` serialise values with `pickle`. Any store writable by an adversary can execute arbitrary code on deserialization. Only use these backends when the backing store is fully access-controlled.
|
|
120
|
+
|
|
121
|
+
### Assuming `InMemoryHistory` persists across runs
|
|
122
|
+
|
|
123
|
+
`InMemoryHistory` holds results in a dict for the lifetime of the process. Restarting the process loses all lineage. Use `SQLiteHistory`, `PostgresHistory`, or `DuckDBHistory` for durability.
|
|
124
|
+
|
|
125
|
+
### Using extensible runs with non-memory `TapestryStore`
|
|
126
|
+
|
|
127
|
+
`tapestry.run(extensible=True)` (required by `WithContinuation` and `LoopSubTapestry`) calls `get_current_store()` mid-run to register new knots. Only `InMemoryStore`, `PostgresStore`, and `ValKeyStore` (all `SubscribableStore` implementors) support this. `SQLiteStore` does not.
|
|
128
|
+
|
|
129
|
+
### Scrubbing `DataStore` values and expecting lineage to break
|
|
130
|
+
|
|
131
|
+
`DataStore` and `RunHistory` are decoupled by design. Deleting a value from the data store removes the payload but leaves the lineage hash record intact. This is intentional for GDPR-style scrubbing.
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## Constraints and gotchas
|
|
136
|
+
|
|
137
|
+
- **`SQLiteHistory` runs migrations on first open.** The first `SQLiteHistory(path=...)` call creates the schema. Concurrent first-opens from multiple processes can race — initialise from a single process or use Postgres for multi-process deployments.
|
|
138
|
+
- **`DuckDBHistory` is optimised for analytical queries, not writes.** Use it for offline lineage analysis, not as the primary history backend of a high-throughput pipeline.
|
|
139
|
+
- **`ValKeyDataStore` and `LocalDiskDataStore` are pickle-based.** See anti-pattern above.
|
|
140
|
+
- **`PostgresStore` and `ValKeyStore` implement `SubscribableStore`.** If you need extensible runs in a distributed deployment, these are the only backends that support it.
|
|
141
|
+
- **Backend constructors are synchronous; connections are lazy.** `PostgresStore(dsn=...)` does not open a connection immediately. The first operation opens it. Call `await backend.close()` when done.
|
|
142
|
+
- **`DataStore.has()` is a cheap existence check** — use it before `get()` when a miss is a valid path, rather than catching `KeyError`.
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## Quick reference
|
|
147
|
+
|
|
148
|
+
| Task | How |
|
|
149
|
+
|------|-----|
|
|
150
|
+
| Default (dev, no persistence) | `Tapestry()` — all in-memory |
|
|
151
|
+
| Durable lineage (single process) | `Tapestry(history=SQLiteHistory(path="pirn.db"))` |
|
|
152
|
+
| Durable lineage (multi-process) | `Tapestry(history=PostgresHistory(dsn=...))` |
|
|
153
|
+
| Analytical lineage queries | `DuckDBHistory(path="lineage.duckdb")` |
|
|
154
|
+
| Persist intermediate values to S3 | `Tapestry(data=S3DataStore(bucket=..., prefix=...))` |
|
|
155
|
+
| Persist intermediate values to GCS | `Tapestry(data=GCSDataStore(bucket=..., prefix=...))` |
|
|
156
|
+
| Persist intermediate values to Azure | `Tapestry(data=AzureBlobDataStore(container=..., prefix=...))` |
|
|
157
|
+
| Persist intermediate values to disk | `Tapestry(data=LocalDiskDataStore(root=Path("/data")))` |
|
|
158
|
+
| Shared tapestry definition (multi-process) | `Tapestry(store=PostgresStore(dsn=...))` |
|
|
159
|
+
| Shared tapestry + extensible runs | `Tapestry(store=PostgresStore(dsn=...), ...)` — `SubscribableStore` required |
|
|
160
|
+
| Query lineage by knot id | `await history.query_lineage_by_knot_id("my-id")` |
|
|
161
|
+
| Query lineage by output hash | `await history.query_lineage_by_output_hash("sha256:abc...")` |
|
|
162
|
+
| Check if a value is cached | `await data_store.has("sha256:abc...")` |
|
|
163
|
+
| Retrieve a cached value | `await data_store.get("sha256:abc...")` |
|
|
164
|
+
| Scrub a value (GDPR) | `await data_store.delete("sha256:abc...")` — lineage record is preserved |
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
*See also: [pirn AGENTIC_USE.md](../../AGENTIC_USE.md)*
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Backend implementations for pirn.
|
|
2
|
+
|
|
3
|
+
Interface base classes live in ``pirn.backends.base``.
|
|
4
|
+
Implementations:
|
|
5
|
+
- ``pirn.backends.in_memory`` — in-memory (default, single-process)
|
|
6
|
+
- ``pirn.backends.sqlite`` — SQLite (durable, single-host)
|
|
7
|
+
- ``pirn.backends.postgres`` — PostgreSQL via asyncpg
|
|
8
|
+
- ``pirn.backends.valkey`` — ValKey/Redis
|
|
9
|
+
- ``pirn.backends.duckdb`` — DuckDB (analytics history)
|
|
10
|
+
- ``pirn.backends.disk`` — local disk data store
|
|
11
|
+
- ``pirn.backends.s3`` — S3 data store
|
|
12
|
+
"""
|
pirn/backends/_signer.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""HMAC-SHA256 payload signing for DataStore backends.
|
|
2
|
+
|
|
3
|
+
Prevents insecure deserialization of tampered payloads (security finding C-1).
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import base64
|
|
9
|
+
import hashlib
|
|
10
|
+
import hmac
|
|
11
|
+
import os
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class _Signer:
|
|
15
|
+
"""Signs and verifies cloudpickle payloads with HMAC-SHA256.
|
|
16
|
+
|
|
17
|
+
The digest size is fixed at 32 bytes by the SHA256 algorithm.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
__digest_size = 32
|
|
21
|
+
|
|
22
|
+
def __init__(self, key: bytes) -> None:
|
|
23
|
+
self.__key = key
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def from_env(cls, var: str = "PIRN_SIGNING_KEY") -> _Signer:
|
|
27
|
+
"""Construct a _Signer from a base64-encoded key in an environment variable.
|
|
28
|
+
|
|
29
|
+
Raises ``ValueError`` if the variable is unset or empty.
|
|
30
|
+
|
|
31
|
+
Example::
|
|
32
|
+
|
|
33
|
+
import secrets, base64
|
|
34
|
+
key_b64 = base64.b64encode(secrets.token_bytes(32)).decode()
|
|
35
|
+
# Set PIRN_SIGNING_KEY=<key_b64> in your environment, then:
|
|
36
|
+
store = LocalDiskDataStore("/data", signer=_Signer.from_env())
|
|
37
|
+
"""
|
|
38
|
+
raw = os.environ.get(var)
|
|
39
|
+
if not raw:
|
|
40
|
+
raise ValueError(
|
|
41
|
+
f"Environment variable {var!r} is not set or empty. "
|
|
42
|
+
"Set it to a base64-encoded signing key before constructing a signed DataStore."
|
|
43
|
+
)
|
|
44
|
+
decoded = base64.b64decode(raw)
|
|
45
|
+
if len(decoded) < 32:
|
|
46
|
+
raise ValueError(
|
|
47
|
+
f"Environment variable {var!r} decoded to {len(decoded)} bytes; "
|
|
48
|
+
"HMAC-SHA256 requires at least 32 bytes of key material. "
|
|
49
|
+
'Generate a key with: python -c "import secrets,base64; print(base64.b64encode(secrets.token_bytes(32)).decode())"'
|
|
50
|
+
)
|
|
51
|
+
return cls(decoded)
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def test_signer(cls) -> _Signer:
|
|
55
|
+
"""Return a deterministic signer for unit tests.
|
|
56
|
+
|
|
57
|
+
Tests that exercise the signing path use this so they don't need
|
|
58
|
+
env-var setup or real key material. **Never use in production.**
|
|
59
|
+
Production signers must come from :meth:`from_env` or a manual
|
|
60
|
+
construction with a per-deployment key.
|
|
61
|
+
|
|
62
|
+
Raises:
|
|
63
|
+
RuntimeError: If called outside a test or CI environment
|
|
64
|
+
(i.e. when PIRN_ENV is not set to "test" or "ci").
|
|
65
|
+
"""
|
|
66
|
+
env = os.environ.get("PIRN_ENV", "").lower()
|
|
67
|
+
if env not in ("test", "ci"):
|
|
68
|
+
raise RuntimeError(
|
|
69
|
+
"_Signer.test_signer() must not be called in production. "
|
|
70
|
+
"Set PIRN_ENV=test or PIRN_ENV=ci to use this method in a "
|
|
71
|
+
"test or CI environment. Use _Signer.from_env() for production."
|
|
72
|
+
)
|
|
73
|
+
return cls(b"pirn-test-signer-key-not-for-production")
|
|
74
|
+
|
|
75
|
+
def sign(self, payload: bytes) -> bytes:
|
|
76
|
+
"""Prepend a 32-byte HMAC-SHA256 signature to payload."""
|
|
77
|
+
sig = hmac.new(self.__key, payload, hashlib.sha256).digest()
|
|
78
|
+
return sig + payload
|
|
79
|
+
|
|
80
|
+
def verify(self, payload: bytes) -> bytes:
|
|
81
|
+
"""Verify the HMAC-SHA256 signature and return the raw payload.
|
|
82
|
+
|
|
83
|
+
Raises ``ValueError`` if the payload is too short or the signature
|
|
84
|
+
does not match.
|
|
85
|
+
"""
|
|
86
|
+
if len(payload) < self.__digest_size:
|
|
87
|
+
raise ValueError("payload too short to contain a signature — possible tampering")
|
|
88
|
+
sig, raw = payload[: self.__digest_size], payload[self.__digest_size :]
|
|
89
|
+
expected = hmac.new(self.__key, raw, hashlib.sha256).digest()
|
|
90
|
+
if not hmac.compare_digest(sig, expected):
|
|
91
|
+
raise ValueError("HMAC signature mismatch — payload may have been tampered with")
|
|
92
|
+
return raw
|