pirn-core 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (472) hide show
  1. pirn/__init__.py +92 -0
  2. pirn/_domain_discovery.py +83 -0
  3. pirn/_migrate/__init__.py +13 -0
  4. pirn/_migrate/import_rewriter.py +110 -0
  5. pirn/_migrate/main.py +76 -0
  6. pirn/backends/AGENTIC_USE.md +168 -0
  7. pirn/backends/__init__.py +12 -0
  8. pirn/backends/_signer.py +92 -0
  9. pirn/backends/azure.py +95 -0
  10. pirn/backends/base/__init__.py +0 -0
  11. pirn/backends/base/_cloud_object_store.py +226 -0
  12. pirn/backends/base/data_store.py +52 -0
  13. pirn/backends/base/run_history.py +118 -0
  14. pirn/backends/base/subscribable_store.py +44 -0
  15. pirn/backends/base/tapestry_snapshot.py +16 -0
  16. pirn/backends/base/tapestry_store.py +51 -0
  17. pirn/backends/disk.py +145 -0
  18. pirn/backends/duckdb.py +305 -0
  19. pirn/backends/gcs.py +89 -0
  20. pirn/backends/in_memory/__init__.py +0 -0
  21. pirn/backends/in_memory/in_memory_data_store.py +66 -0
  22. pirn/backends/in_memory/in_memory_history.py +140 -0
  23. pirn/backends/in_memory/in_memory_store.py +110 -0
  24. pirn/backends/postgres/__init__.py +4 -0
  25. pirn/backends/postgres/_lazy_pool.py +72 -0
  26. pirn/backends/postgres/postgres_history.py +362 -0
  27. pirn/backends/postgres/postgres_store.py +297 -0
  28. pirn/backends/s3.py +178 -0
  29. pirn/backends/sqlite/__init__.py +4 -0
  30. pirn/backends/sqlite/_migrations.py +30 -0
  31. pirn/backends/sqlite/sqlite_history.py +382 -0
  32. pirn/backends/sqlite/sqlite_store.py +151 -0
  33. pirn/backends/valkey/__init__.py +4 -0
  34. pirn/backends/valkey/_lazy_client.py +57 -0
  35. pirn/backends/valkey/valkey_data_store.py +159 -0
  36. pirn/backends/valkey/valkey_store.py +249 -0
  37. pirn/check/__init__.py +0 -0
  38. pirn/check/_loader.py +31 -0
  39. pirn/check/main.py +44 -0
  40. pirn/check/validation_issue.py +14 -0
  41. pirn/check/validation_result.py +22 -0
  42. pirn/check/validator.py +86 -0
  43. pirn/connectors/AGENTIC_USE.md +171 -0
  44. pirn/connectors/__init__.py +17 -0
  45. pirn/connectors/api_client.py +93 -0
  46. pirn/connectors/bi_catalog/AGENTIC_USE.md +107 -0
  47. pirn/connectors/bi_catalog/__init__.py +1 -0
  48. pirn/connectors/bi_catalog/airbyte_client.py +178 -0
  49. pirn/connectors/bi_catalog/airbyte_config.py +41 -0
  50. pirn/connectors/bi_catalog/alation_client.py +215 -0
  51. pirn/connectors/bi_catalog/alation_config.py +36 -0
  52. pirn/connectors/bi_catalog/datahub_client.py +212 -0
  53. pirn/connectors/bi_catalog/datahub_config.py +28 -0
  54. pirn/connectors/bi_catalog/dbt_artifacts_config.py +24 -0
  55. pirn/connectors/bi_catalog/dbt_artifacts_reader.py +184 -0
  56. pirn/connectors/bi_catalog/fivetran_client.py +172 -0
  57. pirn/connectors/bi_catalog/fivetran_config.py +33 -0
  58. pirn/connectors/bi_catalog/open_metadata_client.py +219 -0
  59. pirn/connectors/bi_catalog/open_metadata_config.py +28 -0
  60. pirn/connectors/capabilities/__init__.py +8 -0
  61. pirn/connectors/capabilities/event_emitter.py +40 -0
  62. pirn/connectors/capabilities/metadata_catalog.py +36 -0
  63. pirn/connectors/capabilities/metric_query.py +33 -0
  64. pirn/connectors/capabilities/record_writer.py +26 -0
  65. pirn/connectors/capabilities/table_source.py +47 -0
  66. pirn/connectors/connection_config.py +101 -0
  67. pirn/connectors/connection_config_decorator.py +52 -0
  68. pirn/connectors/database_connection_pool.py +110 -0
  69. pirn/connectors/databases/AGENTIC_USE.md +102 -0
  70. pirn/connectors/databases/__init__.py +0 -0
  71. pirn/connectors/databases/_bigquery_stub_job_config.py +18 -0
  72. pirn/connectors/databases/bigquery_config.py +34 -0
  73. pirn/connectors/databases/bigquery_pool.py +196 -0
  74. pirn/connectors/databases/clickhouse_config.py +37 -0
  75. pirn/connectors/databases/clickhouse_pool.py +175 -0
  76. pirn/connectors/databases/databricks_config.py +35 -0
  77. pirn/connectors/databases/databricks_pool.py +153 -0
  78. pirn/connectors/databases/dremio_config.py +41 -0
  79. pirn/connectors/databases/dremio_pool.py +134 -0
  80. pirn/connectors/databases/duckdb_config.py +60 -0
  81. pirn/connectors/databases/duckdb_pool.py +89 -0
  82. pirn/connectors/databases/mssql_config.py +51 -0
  83. pirn/connectors/databases/mssql_pool.py +163 -0
  84. pirn/connectors/databases/mysql_config.py +38 -0
  85. pirn/connectors/databases/mysql_pool.py +185 -0
  86. pirn/connectors/databases/oracle_config.py +35 -0
  87. pirn/connectors/databases/oracle_pool.py +166 -0
  88. pirn/connectors/databases/postgres_config.py +30 -0
  89. pirn/connectors/databases/postgres_pool.py +107 -0
  90. pirn/connectors/databases/redshift_config.py +31 -0
  91. pirn/connectors/databases/redshift_pool.py +111 -0
  92. pirn/connectors/databases/snowflake_config.py +33 -0
  93. pirn/connectors/databases/snowflake_pool.py +156 -0
  94. pirn/connectors/databases/sqlite_config.py +56 -0
  95. pirn/connectors/databases/sqlite_pool.py +105 -0
  96. pirn/connectors/document/AGENTIC_USE.md +94 -0
  97. pirn/connectors/document/__init__.py +0 -0
  98. pirn/connectors/document/arangodb_config.py +22 -0
  99. pirn/connectors/document/arangodb_pool.py +101 -0
  100. pirn/connectors/document/cosmosdb_config.py +29 -0
  101. pirn/connectors/document/cosmosdb_pool.py +113 -0
  102. pirn/connectors/document/couchbase_config.py +31 -0
  103. pirn/connectors/document/couchbase_pool.py +107 -0
  104. pirn/connectors/document/couchdb_config.py +21 -0
  105. pirn/connectors/document/couchdb_pool.py +120 -0
  106. pirn/connectors/document/firestore_config.py +27 -0
  107. pirn/connectors/document/firestore_pool.py +134 -0
  108. pirn/connectors/document/mongodb_config.py +30 -0
  109. pirn/connectors/document/mongodb_pool.py +121 -0
  110. pirn/connectors/dsn_scrubber.py +40 -0
  111. pirn/connectors/file_format.py +70 -0
  112. pirn/connectors/file_formats/AGENTIC_USE.md +228 -0
  113. pirn/connectors/file_formats/__init__.py +10 -0
  114. pirn/connectors/file_formats/_html_stripper.py +33 -0
  115. pirn/connectors/file_formats/_sam_utils.py +183 -0
  116. pirn/connectors/file_formats/aac_format.py +76 -0
  117. pirn/connectors/file_formats/archive_file_format.py +276 -0
  118. pirn/connectors/file_formats/arrow_ipc_format.py +101 -0
  119. pirn/connectors/file_formats/asdf_format.py +88 -0
  120. pirn/connectors/file_formats/avro_format.py +106 -0
  121. pirn/connectors/file_formats/bam_format.py +107 -0
  122. pirn/connectors/file_formats/batch_file_format.py +51 -0
  123. pirn/connectors/file_formats/bcf_format.py +233 -0
  124. pirn/connectors/file_formats/bdf_format.py +203 -0
  125. pirn/connectors/file_formats/bids_dataset_format.py +136 -0
  126. pirn/connectors/file_formats/brainvision_format.py +339 -0
  127. pirn/connectors/file_formats/cda_xml_format.py +179 -0
  128. pirn/connectors/file_formats/codec.py +28 -0
  129. pirn/connectors/file_formats/codecs/__init__.py +8 -0
  130. pirn/connectors/file_formats/codecs/bzip2_codec.py +49 -0
  131. pirn/connectors/file_formats/codecs/gzip_codec.py +52 -0
  132. pirn/connectors/file_formats/codecs/lz4_codec.py +53 -0
  133. pirn/connectors/file_formats/codecs/snappy_codec.py +56 -0
  134. pirn/connectors/file_formats/codecs/zstd_codec.py +68 -0
  135. pirn/connectors/file_formats/compressed_file_format.py +113 -0
  136. pirn/connectors/file_formats/cram_format.py +127 -0
  137. pirn/connectors/file_formats/csv_format.py +167 -0
  138. pirn/connectors/file_formats/define_xml_format.py +136 -0
  139. pirn/connectors/file_formats/dicom_format.py +294 -0
  140. pirn/connectors/file_formats/dlis_format.py +79 -0
  141. pirn/connectors/file_formats/docx_format.py +98 -0
  142. pirn/connectors/file_formats/edf_format.py +236 -0
  143. pirn/connectors/file_formats/edf_plus_format.py +98 -0
  144. pirn/connectors/file_formats/epub_format.py +200 -0
  145. pirn/connectors/file_formats/fasta_format.py +171 -0
  146. pirn/connectors/file_formats/fastq_format.py +161 -0
  147. pirn/connectors/file_formats/feather_format.py +76 -0
  148. pirn/connectors/file_formats/fhir_json_format.py +119 -0
  149. pirn/connectors/file_formats/fhir_xml_format.py +196 -0
  150. pirn/connectors/file_formats/fits_format.py +117 -0
  151. pirn/connectors/file_formats/flac_format.py +95 -0
  152. pirn/connectors/file_formats/geojson_format.py +144 -0
  153. pirn/connectors/file_formats/geopackage_format.py +201 -0
  154. pirn/connectors/file_formats/geotiff_format.py +208 -0
  155. pirn/connectors/file_formats/gguf_format.py +188 -0
  156. pirn/connectors/file_formats/grib_format.py +131 -0
  157. pirn/connectors/file_formats/hdf5_format.py +212 -0
  158. pirn/connectors/file_formats/heic_format.py +130 -0
  159. pirn/connectors/file_formats/hl7v2_format.py +151 -0
  160. pirn/connectors/file_formats/html_format.py +154 -0
  161. pirn/connectors/file_formats/joblib_format.py +128 -0
  162. pirn/connectors/file_formats/jpeg_format.py +121 -0
  163. pirn/connectors/file_formats/json_format.py +114 -0
  164. pirn/connectors/file_formats/jsonl_format.py +85 -0
  165. pirn/connectors/file_formats/kml_format.py +191 -0
  166. pirn/connectors/file_formats/las_format.py +106 -0
  167. pirn/connectors/file_formats/m4a_format.py +75 -0
  168. pirn/connectors/file_formats/markdown_format.py +240 -0
  169. pirn/connectors/file_formats/matlab_mat_format.py +211 -0
  170. pirn/connectors/file_formats/mp3_format.py +76 -0
  171. pirn/connectors/file_formats/mzml_format.py +201 -0
  172. pirn/connectors/file_formats/netcdf4_format.py +169 -0
  173. pirn/connectors/file_formats/netcdf_format.py +270 -0
  174. pirn/connectors/file_formats/nifti_format.py +99 -0
  175. pirn/connectors/file_formats/numpy_npy_format.py +207 -0
  176. pirn/connectors/file_formats/numpy_npz_format.py +193 -0
  177. pirn/connectors/file_formats/ods_format.py +195 -0
  178. pirn/connectors/file_formats/ogg_format.py +94 -0
  179. pirn/connectors/file_formats/onnx_format.py +107 -0
  180. pirn/connectors/file_formats/open_slide_format.py +145 -0
  181. pirn/connectors/file_formats/orc_format.py +77 -0
  182. pirn/connectors/file_formats/parquet_format.py +119 -0
  183. pirn/connectors/file_formats/pdf_format.py +139 -0
  184. pirn/connectors/file_formats/plain_text_format.py +220 -0
  185. pirn/connectors/file_formats/png_format.py +106 -0
  186. pirn/connectors/file_formats/pptx_format.py +145 -0
  187. pirn/connectors/file_formats/prodml_format.py +122 -0
  188. pirn/connectors/file_formats/pytorch_format.py +152 -0
  189. pirn/connectors/file_formats/resqml_format.py +124 -0
  190. pirn/connectors/file_formats/root_format.py +109 -0
  191. pirn/connectors/file_formats/rtf_format.py +115 -0
  192. pirn/connectors/file_formats/safetensors_format.py +185 -0
  193. pirn/connectors/file_formats/sam_format.py +110 -0
  194. pirn/connectors/file_formats/sdtm_xpt_format.py +143 -0
  195. pirn/connectors/file_formats/segd_format.py +89 -0
  196. pirn/connectors/file_formats/segy_format.py +141 -0
  197. pirn/connectors/file_formats/shapefile_format.py +162 -0
  198. pirn/connectors/file_formats/streaming_file_format.py +22 -0
  199. pirn/connectors/file_formats/tf_saved_model_format.py +136 -0
  200. pirn/connectors/file_formats/tflite_format.py +133 -0
  201. pirn/connectors/file_formats/tiff_format.py +174 -0
  202. pirn/connectors/file_formats/tsv_format.py +43 -0
  203. pirn/connectors/file_formats/vcf_format.py +231 -0
  204. pirn/connectors/file_formats/wav_format.py +75 -0
  205. pirn/connectors/file_formats/webp_format.py +135 -0
  206. pirn/connectors/file_formats/witsml_format.py +131 -0
  207. pirn/connectors/file_formats/xlsx_format.py +168 -0
  208. pirn/connectors/file_formats/zarr_format.py +258 -0
  209. pirn/connectors/graph/AGENTIC_USE.md +72 -0
  210. pirn/connectors/graph/__init__.py +0 -0
  211. pirn/connectors/graph/memgraph_config.py +23 -0
  212. pirn/connectors/graph/memgraph_pool.py +92 -0
  213. pirn/connectors/graph/neo4j_config.py +23 -0
  214. pirn/connectors/graph/neo4j_pool.py +114 -0
  215. pirn/connectors/graph/orientdb_config.py +23 -0
  216. pirn/connectors/graph/orientdb_pool.py +101 -0
  217. pirn/connectors/knots/__init__.py +0 -0
  218. pirn/connectors/knots/database_connection_pool_knot.py +33 -0
  219. pirn/connectors/knots/database_execute_sink.py +90 -0
  220. pirn/connectors/knots/database_query_source.py +86 -0
  221. pirn/connectors/knots/message_broker_knot.py +31 -0
  222. pirn/connectors/knots/message_broker_publish_sink.py +89 -0
  223. pirn/connectors/knots/object_store_knot.py +31 -0
  224. pirn/connectors/knots/object_store_list_source.py +61 -0
  225. pirn/connectors/knots/object_store_read_source.py +73 -0
  226. pirn/connectors/knots/object_store_write_sink.py +59 -0
  227. pirn/connectors/message_broker.py +54 -0
  228. pirn/connectors/messaging/AGENTIC_USE.md +91 -0
  229. pirn/connectors/messaging/__init__.py +0 -0
  230. pirn/connectors/messaging/discord_client.py +158 -0
  231. pirn/connectors/messaging/discord_config.py +33 -0
  232. pirn/connectors/messaging/google_chat_client.py +114 -0
  233. pirn/connectors/messaging/google_chat_config.py +26 -0
  234. pirn/connectors/messaging/pagerduty_client.py +206 -0
  235. pirn/connectors/messaging/pagerduty_config.py +32 -0
  236. pirn/connectors/messaging/slack_client.py +139 -0
  237. pirn/connectors/messaging/slack_config.py +33 -0
  238. pirn/connectors/messaging/teams_client.py +145 -0
  239. pirn/connectors/messaging/teams_config.py +26 -0
  240. pirn/connectors/messaging/telegram_client.py +146 -0
  241. pirn/connectors/messaging/telegram_config.py +42 -0
  242. pirn/connectors/object_storage/AGENTIC_USE.md +93 -0
  243. pirn/connectors/object_storage/__init__.py +0 -0
  244. pirn/connectors/object_storage/azure_blob_config.py +44 -0
  245. pirn/connectors/object_storage/azure_blob_store.py +146 -0
  246. pirn/connectors/object_storage/gcs_config.py +34 -0
  247. pirn/connectors/object_storage/gcs_store.py +132 -0
  248. pirn/connectors/object_storage/hdfs_config.py +39 -0
  249. pirn/connectors/object_storage/hdfs_store.py +250 -0
  250. pirn/connectors/object_storage/local_filesystem_config.py +32 -0
  251. pirn/connectors/object_storage/local_filesystem_store.py +118 -0
  252. pirn/connectors/object_storage/s3_config.py +46 -0
  253. pirn/connectors/object_storage/s3_store.py +132 -0
  254. pirn/connectors/object_store.py +64 -0
  255. pirn/connectors/observability/AGENTIC_USE.md +106 -0
  256. pirn/connectors/observability/__init__.py +1 -0
  257. pirn/connectors/observability/datadog_client.py +262 -0
  258. pirn/connectors/observability/datadog_config.py +31 -0
  259. pirn/connectors/observability/grafana_client.py +262 -0
  260. pirn/connectors/observability/grafana_config.py +27 -0
  261. pirn/connectors/observability/opentelemetry_config.py +41 -0
  262. pirn/connectors/observability/opentelemetry_span_emitter.py +122 -0
  263. pirn/connectors/observability/prometheus_client.py +193 -0
  264. pirn/connectors/observability/prometheus_config.py +28 -0
  265. pirn/connectors/saas/AGENTIC_USE.md +113 -0
  266. pirn/connectors/saas/__init__.py +1 -0
  267. pirn/connectors/saas/airtable_client.py +219 -0
  268. pirn/connectors/saas/airtable_config.py +35 -0
  269. pirn/connectors/saas/amplitude_client.py +172 -0
  270. pirn/connectors/saas/amplitude_config.py +26 -0
  271. pirn/connectors/saas/github_client.py +212 -0
  272. pirn/connectors/saas/github_config.py +35 -0
  273. pirn/connectors/saas/google_analytics_client.py +202 -0
  274. pirn/connectors/saas/google_analytics_config.py +30 -0
  275. pirn/connectors/saas/hubspot_client.py +192 -0
  276. pirn/connectors/saas/hubspot_config.py +29 -0
  277. pirn/connectors/saas/jira_client.py +206 -0
  278. pirn/connectors/saas/jira_config.py +34 -0
  279. pirn/connectors/saas/mixpanel_client.py +155 -0
  280. pirn/connectors/saas/mixpanel_config.py +35 -0
  281. pirn/connectors/saas/salesforce_client.py +242 -0
  282. pirn/connectors/saas/salesforce_config.py +36 -0
  283. pirn/connectors/saas/shopify_client.py +278 -0
  284. pirn/connectors/saas/shopify_config.py +29 -0
  285. pirn/connectors/saas/stripe_client.py +189 -0
  286. pirn/connectors/saas/stripe_config.py +27 -0
  287. pirn/connectors/saas/twilio_client.py +181 -0
  288. pirn/connectors/saas/twilio_config.py +30 -0
  289. pirn/connectors/saas/zendesk_client.py +221 -0
  290. pirn/connectors/saas/zendesk_config.py +33 -0
  291. pirn/connectors/streaming/AGENTIC_USE.md +100 -0
  292. pirn/connectors/streaming/__init__.py +0 -0
  293. pirn/connectors/streaming/azure_servicebus_broker.py +198 -0
  294. pirn/connectors/streaming/azure_servicebus_config.py +18 -0
  295. pirn/connectors/streaming/azure_servicebus_stub_message.py +25 -0
  296. pirn/connectors/streaming/kafka_broker.py +144 -0
  297. pirn/connectors/streaming/kafka_config.py +27 -0
  298. pirn/connectors/streaming/kinesis_broker.py +150 -0
  299. pirn/connectors/streaming/kinesis_config.py +26 -0
  300. pirn/connectors/streaming/pubsub_broker.py +211 -0
  301. pirn/connectors/streaming/pubsub_config.py +18 -0
  302. pirn/connectors/streaming/rabbitmq_broker.py +163 -0
  303. pirn/connectors/streaming/rabbitmq_config.py +22 -0
  304. pirn/connectors/streaming/rabbitmq_plain_message.py +23 -0
  305. pirn/connectors/streaming/valkey_record.py +52 -0
  306. pirn/connectors/streaming/valkey_stream_broker.py +128 -0
  307. pirn/connectors/streaming/valkey_stream_config.py +24 -0
  308. pirn/connectors/timeseries/AGENTIC_USE.md +109 -0
  309. pirn/connectors/timeseries/__init__.py +0 -0
  310. pirn/connectors/timeseries/influxdb_config.py +32 -0
  311. pirn/connectors/timeseries/influxdb_pool.py +124 -0
  312. pirn/connectors/timeseries/kdb_config.py +26 -0
  313. pirn/connectors/timeseries/kdb_pool.py +151 -0
  314. pirn/connectors/timeseries/questdb_config.py +32 -0
  315. pirn/connectors/timeseries/questdb_pool.py +94 -0
  316. pirn/connectors/timeseries/timescaledb_config.py +31 -0
  317. pirn/connectors/timeseries/timescaledb_pool.py +111 -0
  318. pirn/connectors/timeseries/victoriametrics_config.py +26 -0
  319. pirn/connectors/timeseries/victoriametrics_pool.py +129 -0
  320. pirn/connectors/transports/__init__.py +1 -0
  321. pirn/connectors/transports/object_store_transport.py +170 -0
  322. pirn/connectors/transports/valkey_transport.py +227 -0
  323. pirn/core/__init__.py +1 -0
  324. pirn/core/assembler.py +39 -0
  325. pirn/core/disassembler.py +43 -0
  326. pirn/core/err.py +46 -0
  327. pirn/core/error_policy.py +19 -0
  328. pirn/core/hashing.py +159 -0
  329. pirn/core/identity/__init__.py +15 -0
  330. pirn/core/identity/chained_identity_resolver.py +20 -0
  331. pirn/core/identity/env_identity_resolver.py +35 -0
  332. pirn/core/identity/identity_resolver.py +15 -0
  333. pirn/core/identity/null_identity_resolver.py +10 -0
  334. pirn/core/identity/os_identity_resolver.py +12 -0
  335. pirn/core/identity/static_identity_resolver.py +16 -0
  336. pirn/core/knot.py +611 -0
  337. pirn/core/knot_config.py +95 -0
  338. pirn/core/knot_factory.py +108 -0
  339. pirn/core/knot_source.py +86 -0
  340. pirn/core/lineage.py +133 -0
  341. pirn/core/ok.py +38 -0
  342. pirn/core/optional.py +217 -0
  343. pirn/core/parameter.py +173 -0
  344. pirn/core/parameter_spec.py +19 -0
  345. pirn/core/payload.py +62 -0
  346. pirn/core/pirn_opaque_value.py +85 -0
  347. pirn/core/providers/__init__.py +0 -0
  348. pirn/core/providers/embedding_provider.py +29 -0
  349. pirn/core/providers/llm_provider.py +66 -0
  350. pirn/core/result.py +19 -0
  351. pirn/core/run_context.py +112 -0
  352. pirn/core/run_request.py +47 -0
  353. pirn/core/run_result.py +120 -0
  354. pirn/core/sentinels/__init__.py +0 -0
  355. pirn/core/sentinels/_unset.py +9 -0
  356. pirn/core/skipped.py +33 -0
  357. pirn/core/transport/__init__.py +1 -0
  358. pirn/core/transport/data_transport.py +116 -0
  359. pirn/core/transport/dual_write_transport.py +109 -0
  360. pirn/core/transport/filesystem_transport.py +276 -0
  361. pirn/core/transport/inline_transport.py +87 -0
  362. pirn/core/transport/serializers/__init__.py +1 -0
  363. pirn/core/transport/serializers/numpy_serializer.py +53 -0
  364. pirn/core/transport/serializers/pickle_serializer.py +39 -0
  365. pirn/core/transport/serializers/serialiser_error.py +8 -0
  366. pirn/core/transport/serializers/serializer.py +46 -0
  367. pirn/core/transport/serializers/serializer_registry.py +91 -0
  368. pirn/core/transport/smart_transport.py +131 -0
  369. pirn/core/transport/transport_error.py +8 -0
  370. pirn/core/transport/transport_handle.py +49 -0
  371. pirn/domains/__init__.py +44 -0
  372. pirn/domains/_domain_compat_finder.py +143 -0
  373. pirn/domains/_domain_compat_loader.py +29 -0
  374. pirn/emitters/AGENTIC_USE.md +107 -0
  375. pirn/emitters/__init__.py +14 -0
  376. pirn/emitters/base.py +70 -0
  377. pirn/emitters/emitter_error_policy.py +16 -0
  378. pirn/emitters/kafka.py +147 -0
  379. pirn/emitters/log.py +118 -0
  380. pirn/emitters/otel.py +160 -0
  381. pirn/emitters/valkey.py +102 -0
  382. pirn/emitters/webhook.py +208 -0
  383. pirn/engine/__init__.py +1 -0
  384. pirn/engine/_emitter_subscriber.py +25 -0
  385. pirn/engine/dispatchers/AGENTIC_USE.md +134 -0
  386. pirn/engine/dispatchers/__init__.py +0 -0
  387. pirn/engine/dispatchers/celery_dispatcher.py +125 -0
  388. pirn/engine/dispatchers/dask_dispatcher.py +101 -0
  389. pirn/engine/dispatchers/dispatcher.py +22 -0
  390. pirn/engine/dispatchers/local_dispatcher.py +25 -0
  391. pirn/engine/dispatchers/ray_dispatcher.py +95 -0
  392. pirn/engine/dispatchers/thread_dispatcher.py +45 -0
  393. pirn/engine/engine.py +655 -0
  394. pirn/engine/shed/__init__.py +0 -0
  395. pirn/engine/shed/edge.py +13 -0
  396. pirn/engine/shed/shed.py +195 -0
  397. pirn/engine/shed/shed_error.py +6 -0
  398. pirn/exceptions/__init__.py +0 -0
  399. pirn/exceptions/data_integrity_error.py +7 -0
  400. pirn/exceptions/duplicate_knot_error.py +7 -0
  401. pirn/exceptions/invalid_branch_error.py +7 -0
  402. pirn/exceptions/pipeline_load_error.py +7 -0
  403. pirn/exceptions/pirn_config_error.py +7 -0
  404. pirn/exceptions/pirn_error.py +5 -0
  405. pirn/exceptions/tapestry_error.py +7 -0
  406. pirn/exceptions/unbound_parameter_error.py +7 -0
  407. pirn/managers/__init__.py +1 -0
  408. pirn/managers/exception_manager.py +67 -0
  409. pirn/managers/exception_record.py +39 -0
  410. pirn/managers/knot_state.py +13 -0
  411. pirn/managers/rebindable_exception.py +24 -0
  412. pirn/managers/redact.py +26 -0
  413. pirn/managers/status_event.py +19 -0
  414. pirn/managers/status_manager.py +59 -0
  415. pirn/nodes/AGENTIC_USE.md +186 -0
  416. pirn/nodes/__init__.py +14 -0
  417. pirn/nodes/aggregator.py +125 -0
  418. pirn/nodes/branch/__init__.py +0 -0
  419. pirn/nodes/branch/_branch_not_selected.py +5 -0
  420. pirn/nodes/branch/branch.py +142 -0
  421. pirn/nodes/branch/branch_output.py +77 -0
  422. pirn/nodes/continuation.py +201 -0
  423. pirn/nodes/gate/__init__.py +0 -0
  424. pirn/nodes/gate/_gate_closed.py +5 -0
  425. pirn/nodes/gate/gate.py +104 -0
  426. pirn/nodes/loop_sub_tapestry.py +276 -0
  427. pirn/nodes/map_markers.py +112 -0
  428. pirn/nodes/reduce_.py +154 -0
  429. pirn/nodes/sink.py +66 -0
  430. pirn/nodes/source.py +68 -0
  431. pirn/nodes/sub_tapestry.py +249 -0
  432. pirn/replay.py +156 -0
  433. pirn/streaming/AGENTIC_USE.md +125 -0
  434. pirn/streaming/__init__.py +20 -0
  435. pirn/streaming/base.py +103 -0
  436. pirn/streaming/file_tail.py +75 -0
  437. pirn/streaming/iterable.py +48 -0
  438. pirn/streaming/kafka.py +93 -0
  439. pirn/streaming/trigger_adapter.py +62 -0
  440. pirn/tapestry.py +319 -0
  441. pirn/triggers/AGENTIC_USE.md +136 -0
  442. pirn/triggers/__init__.py +11 -0
  443. pirn/triggers/base.py +91 -0
  444. pirn/triggers/cron.py +150 -0
  445. pirn/triggers/http.py +238 -0
  446. pirn/triggers/kafka.py +138 -0
  447. pirn/triggers/valkey.py +132 -0
  448. pirn/viz/__init__.py +11 -0
  449. pirn/viz/_explore_cli.py +64 -0
  450. pirn/viz/_scanner.py +375 -0
  451. pirn/viz/_tapestry_graph.py +26 -0
  452. pirn/viz/explorer.py +1541 -0
  453. pirn/viz/html.py +427 -0
  454. pirn/viz/mermaid.py +149 -0
  455. pirn/yaml_loader/__init__.py +6 -0
  456. pirn/yaml_loader/loader.py +444 -0
  457. pirn/yaml_loader/specs/__init__.py +0 -0
  458. pirn/yaml_loader/specs/aggregator_spec.py +15 -0
  459. pirn/yaml_loader/specs/branch_spec.py +14 -0
  460. pirn/yaml_loader/specs/gate_spec.py +11 -0
  461. pirn/yaml_loader/specs/knot_spec.py +14 -0
  462. pirn/yaml_loader/specs/map_spec.py +15 -0
  463. pirn/yaml_loader/specs/node_spec.py +15 -0
  464. pirn/yaml_loader/specs/pipeline_spec.py +45 -0
  465. pirn/yaml_loader/specs/reduce_spec.py +15 -0
  466. pirn/yaml_loader/specs/sink_spec.py +14 -0
  467. pirn/yaml_loader/specs/source_spec.py +15 -0
  468. pirn/yaml_loader/specs/yaml_parameter_spec.py +17 -0
  469. pirn_core-0.4.0.dist-info/METADATA +352 -0
  470. pirn_core-0.4.0.dist-info/RECORD +472 -0
  471. pirn_core-0.4.0.dist-info/WHEEL +4 -0
  472. pirn_core-0.4.0.dist-info/entry_points.txt +4 -0
pirn/__init__.py ADDED
@@ -0,0 +1,92 @@
1
+ """pirn — a pipeline framework where everything is a knot.
2
+
3
+ Knot discovery
4
+ --------------
5
+ At import time, pirn calls :meth:`sweet_tea.registry.Registry.fill_registry`
6
+ over its own package tree. Every :class:`pirn.core.knot.Knot` subclass shipped
7
+ with pirn is auto-registered under ``library="pirn"`` with the lowercase
8
+ class name as its registry key (CamelCase, snake_case, and no-underscore
9
+ variations all resolve to the same entry through sweet_tea's
10
+ :meth:`BaseFactory._generate_key_variations`).
11
+
12
+ This means YAML pipelines can reference any built-in pirn knot by name
13
+ without ``import`` boilerplate::
14
+
15
+ nodes:
16
+ - id: read
17
+ callable: object_store_read_source
18
+
19
+ YAML name resolution goes through
20
+ :class:`sweet_tea.abstract_inverter_factory.AbstractInverterFactory[Knot]`
21
+ — sweet_tea's typed factory that returns the class definition (rather than
22
+ instantiating it), so the loader can supply construction kwargs later.
23
+
24
+ User projects: register your own knots
25
+ --------------------------------------
26
+ If you define your own :class:`Knot` subclasses outside the pirn package
27
+ (e.g. ``my_company.transforms.NormaliseAddresses``), call
28
+ :meth:`Registry.fill_registry` from **your** project's package init so your
29
+ classes are auto-discovered too::
30
+
31
+ # my_company/__init__.py
32
+ from sweet_tea.registry import Registry
33
+
34
+ Registry.fill_registry() # scans my_company/ and registers every class
35
+
36
+ After that, your knots are resolvable by name from YAML pipelines just like
37
+ pirn's built-ins. To restrict resolution to your library only, look up via
38
+ ``AbstractInverterFactory[Knot].create(name, library="my_company")``.
39
+ """
40
+
41
+ import warnings
42
+ from importlib.metadata import PackageNotFoundError, version
43
+
44
+ from sweet_tea.registry import Registry
45
+ from sweet_tea.sweet_tea_warning import SweetTeaWarning
46
+
47
+ with warnings.catch_warnings():
48
+ warnings.simplefilter("ignore", SweetTeaWarning)
49
+ Registry.fill_registry()
50
+
51
+ try:
52
+ # Core ships as the ``pirn-core`` distribution but imports as ``pirn``.
53
+ __version__ = version("pirn-core")
54
+ except PackageNotFoundError:
55
+ __version__ = "unknown"
56
+
57
+ # Public API re-exports — users may import from pirn directly.
58
+ # Registry.fill_registry() above must run first; noqa: E402 suppresses the
59
+ # "import not at top of file" warnings that follow from that ordering.
60
+ from pirn._domain_discovery import discover_installed_domains
61
+ from pirn.core.assembler import Assembler
62
+ from pirn.core.disassembler import Disassembler
63
+ from pirn.core.error_policy import ErrorPolicy
64
+ from pirn.core.knot import Knot
65
+ from pirn.core.knot_config import KnotConfig
66
+ from pirn.core.knot_factory import knot
67
+ from pirn.core.parameter import Parameter
68
+ from pirn.core.run_request import RunRequest
69
+ from pirn.core.run_result import RunResult
70
+ from pirn.nodes.loop_sub_tapestry import LoopSubTapestry
71
+ from pirn.nodes.sink import Sink
72
+ from pirn.nodes.source import Source
73
+ from pirn.nodes.sub_tapestry import SubTapestry
74
+ from pirn.tapestry import Tapestry
75
+
76
+ __all__ = [
77
+ "Tapestry",
78
+ "Knot",
79
+ "KnotConfig",
80
+ "knot",
81
+ "Parameter",
82
+ "RunRequest",
83
+ "RunResult",
84
+ "ErrorPolicy",
85
+ "Assembler",
86
+ "Disassembler",
87
+ "Sink",
88
+ "Source",
89
+ "SubTapestry",
90
+ "LoopSubTapestry",
91
+ "discover_installed_domains",
92
+ ]
@@ -0,0 +1,83 @@
1
+ """Discover and import installed pirn domain packages.
2
+
3
+ Domains ship as standalone distributions (``pirn-signal``, ``pirn-data`` …)
4
+ whose import packages are ``pirn_signal``, ``pirn_data`` … Importing each one
5
+ triggers its ``Registry.fill_registry()`` self-registration, making its knots
6
+ resolvable by bare name through sweet_tea's factory. This module finds which
7
+ domain distributions are installed and imports the corresponding packages.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import importlib
13
+ from importlib.metadata import distributions
14
+ from importlib.util import find_spec
15
+
16
+
17
+ class _DomainDiscovery:
18
+ """Locate installed pirn domain import-packages and import them.
19
+
20
+ The six domain names are fixed framework data, kept as an instance
21
+ attribute rather than a module constant. ``pirn`` (core) is intentionally
22
+ excluded — it self-registers on its own import.
23
+ """
24
+
25
+ def __init__(self) -> None:
26
+ self._domains: tuple[str, ...] = (
27
+ "agents",
28
+ "data",
29
+ "health",
30
+ "ml",
31
+ "oilgas",
32
+ "signal",
33
+ )
34
+
35
+ def installed_import_names(self) -> tuple[str, ...]:
36
+ """Return the ``pirn_<x>`` import names whose distribution is installed.
37
+
38
+ Introspects installed distributions via
39
+ :func:`importlib.metadata.distributions`, matching the canonical
40
+ ``pirn-<domain>`` distribution names, then keeps only those whose
41
+ import package is actually resolvable on ``sys.path``.
42
+ """
43
+ wanted = {f"pirn-{domain}": f"pirn_{domain}" for domain in self._domains}
44
+ found: set[str] = set()
45
+ for dist in distributions():
46
+ dist_name = dist.metadata["Name"]
47
+ if dist_name is None:
48
+ continue
49
+ normalized = dist_name.replace("_", "-").lower()
50
+ import_name = wanted.get(normalized)
51
+ if import_name is not None and find_spec(import_name) is not None:
52
+ found.add(import_name)
53
+ return tuple(sorted(found))
54
+
55
+ def discover(self) -> tuple[str, ...]:
56
+ """Import every installed domain package; return what was imported.
57
+
58
+ Idempotent — re-importing an already-imported module is a no-op.
59
+ Genuine import errors are not swallowed: they propagate wrapped in an
60
+ :class:`ImportError` that names the offending package for context.
61
+ """
62
+ imported: list[str] = []
63
+ for import_name in self.installed_import_names():
64
+ try:
65
+ importlib.import_module(import_name)
66
+ except ImportError as exc:
67
+ raise ImportError(
68
+ f"failed to import discovered pirn domain {import_name!r}: {exc}"
69
+ ) from exc
70
+ imported.append(import_name)
71
+ return tuple(imported)
72
+
73
+
74
+ def discover_installed_domains() -> tuple[str, ...]:
75
+ """Import all installed pirn domain packages and return their import names.
76
+
77
+ Each imported ``pirn_<x>`` package self-registers its knots via
78
+ ``Registry.fill_registry()``, so after this call their knots resolve by
79
+ bare name through sweet_tea's factory (the same path the YAML loader
80
+ uses). Returns the sorted tuple of import names that were imported. Safe
81
+ to call repeatedly.
82
+ """
83
+ return _DomainDiscovery().discover()
@@ -0,0 +1,13 @@
1
+ """Import-compatibility codemod for the pirn monolith split (SCD-17).
2
+
3
+ When pirn was a monolith the six domains lived under ``pirn.domains.<x>``.
4
+ They are now standalone packages that import as ``pirn_<x>`` (for x in
5
+ signal, oilgas, data, ml, agents, health). This package ships a reusable,
6
+ idempotent, deterministic line-based rewriter that updates consumer source
7
+ from the old ``pirn.domains.<x>`` spellings to the new ``pirn_<x>`` ones.
8
+
9
+ It is exposed to end users as the ``pirn-migrate-imports`` console script
10
+ (see ``pirn._migrate.main``) and reused as the migration tool in SCD-23.
11
+ """
12
+
13
+ from __future__ import annotations
@@ -0,0 +1,110 @@
1
+ """Line-based rewriter for the ``pirn.domains.<x>`` -> ``pirn_<x>`` split.
2
+
3
+ See :mod:`pirn._migrate` for background. The rewriter is a pure text
4
+ transform: it operates line by line on ``.py`` source, only ever touching
5
+ ``import`` / ``from ... import`` statements that reference one of the six
6
+ known domains, and otherwise preserves formatting, indentation and comments
7
+ byte-for-byte. It is idempotent (a rewritten file is a fixed point) and
8
+ deterministic (identical input always yields identical output).
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import re
14
+ from pathlib import Path
15
+
16
+
17
+ class ImportRewriter:
18
+ """Rewrites legacy ``pirn.domains.<x>`` imports to ``pirn_<x>``.
19
+
20
+ The set of domains is fixed framework data (the monolith carved out
21
+ exactly these six packages), so it is stored as a lowercase class
22
+ attribute rather than a configurable constant. Only these names are
23
+ ever rewritten; any other ``pirn.domains.<other>`` reference (e.g.
24
+ ``pirn.domains.extras_loader``) is left untouched.
25
+ """
26
+
27
+ _domains: tuple[str, ...] = (
28
+ "signal",
29
+ "oilgas",
30
+ "data",
31
+ "ml",
32
+ "agents",
33
+ "health",
34
+ )
35
+
36
+ def __init__(self) -> None:
37
+ domain_alt = "|".join(self._domains)
38
+ # The captured domain is followed by one of: end-of-token (whitespace,
39
+ # `.`, `,`, end-of-line) — never another identifier character — so a
40
+ # non-domain like `pirn.domains.datasource` cannot match `data`.
41
+ boundary = r"(?![A-Za-z0-9_])"
42
+
43
+ # `from pirn.domains import <x>[ as alias]` -> `import pirn_<x>[ as alias]`.
44
+ # Only handled when the imported name is a single bare domain.
45
+ self._from_domains_import = re.compile(
46
+ rf"^(?P<indent>\s*)from\s+pirn\.domains\s+import\s+"
47
+ rf"(?P<domain>{domain_alt}){boundary}"
48
+ rf"(?P<alias>\s+as\s+[A-Za-z_][A-Za-z0-9_]*)?\s*$"
49
+ )
50
+
51
+ # `from pirn.domains.<x>[.sub...] import ...` -> `from pirn_<x>[.sub...] import ...`.
52
+ self._from_submodule = re.compile(
53
+ rf"^(?P<indent>\s*)from\s+pirn\.domains\."
54
+ rf"(?P<domain>{domain_alt}){boundary}"
55
+ rf"(?P<tail>(?:\.[A-Za-z_][A-Za-z0-9_]*)*)\s+import\s"
56
+ )
57
+
58
+ # `import pirn.domains.<x>[.sub...][ as alias]` -> `import pirn_<x>[.sub...][ as alias]`.
59
+ self._import_module = re.compile(
60
+ rf"^(?P<indent>\s*)import\s+pirn\.domains\."
61
+ rf"(?P<domain>{domain_alt}){boundary}"
62
+ rf"(?P<tail>(?:\.[A-Za-z_][A-Za-z0-9_]*)*)"
63
+ rf"(?P<rest>\s+as\s+[A-Za-z_][A-Za-z0-9_]*\s*|\s*)$"
64
+ )
65
+
66
+ def rewrite_line(self, line: str) -> str:
67
+ """Rewrite a single source line, returning it unchanged if no rule applies."""
68
+ match = self._from_domains_import.match(line)
69
+ if match is not None:
70
+ alias = match.group("alias") or ""
71
+ return f"{match.group('indent')}import pirn_{match.group('domain')}{alias}\n"
72
+
73
+ match = self._from_submodule.match(line)
74
+ if match is not None:
75
+ end = match.end()
76
+ head = (
77
+ f"{match.group('indent')}from pirn_{match.group('domain')}"
78
+ f"{match.group('tail')} import "
79
+ )
80
+ return head + line[end:]
81
+
82
+ match = self._import_module.match(line)
83
+ if match is not None:
84
+ return (
85
+ f"{match.group('indent')}import pirn_{match.group('domain')}"
86
+ f"{match.group('tail')}{match.group('rest')}"
87
+ )
88
+
89
+ return line
90
+
91
+ def rewrite_text(self, source: str) -> str:
92
+ """Rewrite every applicable import line in a source string."""
93
+ if "pirn.domains." not in source and "pirn.domains " not in source:
94
+ return source
95
+ lines = source.splitlines(keepends=True)
96
+ return "".join(self.rewrite_line(line) for line in lines)
97
+
98
+ def rewrite_file(self, path: Path) -> bool:
99
+ """Rewrite a file in place. Returns ``True`` iff its contents changed."""
100
+ original = path.read_text(encoding="utf-8")
101
+ rewritten = self.rewrite_text(original)
102
+ if rewritten == original:
103
+ return False
104
+ path.write_text(rewritten, encoding="utf-8")
105
+ return True
106
+
107
+ def file_needs_rewrite(self, path: Path) -> bool:
108
+ """Return ``True`` iff the file would change, without writing it."""
109
+ original = path.read_text(encoding="utf-8")
110
+ return self.rewrite_text(original) != original
pirn/_migrate/main.py ADDED
@@ -0,0 +1,76 @@
1
+ """CLI for the ``pirn.domains.<x>`` -> ``pirn_<x>`` import codemod (SCD-17).
2
+
3
+ Exposed as the ``pirn-migrate-imports`` console script. Accepts one or more
4
+ files or directories. Directories are walked recursively for ``.py`` files.
5
+ By default rewrites in place; ``--check`` is a dry run that reports what
6
+ would change and exits non-zero if any file needs rewriting. Output is a
7
+ deterministic, sorted summary.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import sys
14
+ from pathlib import Path
15
+
16
+ from pirn._migrate.import_rewriter import ImportRewriter
17
+
18
+
19
+ def _collect_py_files(paths: list[str]) -> list[Path]:
20
+ """Return the sorted, de-duplicated set of ``.py`` files under ``paths``."""
21
+ found: set[Path] = set()
22
+ for raw in paths:
23
+ path = Path(raw)
24
+ if path.is_dir():
25
+ found.update(p for p in path.rglob("*.py") if p.is_file())
26
+ elif path.suffix == ".py" and path.is_file():
27
+ found.add(path)
28
+ return sorted(found)
29
+
30
+
31
+ def main(argv: list[str] | None = None) -> int:
32
+ """Entry point for the ``pirn-migrate-imports`` console script."""
33
+ parser = argparse.ArgumentParser(
34
+ prog="pirn-migrate-imports",
35
+ description=(
36
+ "Rewrite legacy `pirn.domains.<x>` imports to the standalone "
37
+ "`pirn_<x>` packages (x in signal, oilgas, data, ml, agents, health)."
38
+ ),
39
+ )
40
+ parser.add_argument(
41
+ "paths",
42
+ nargs="+",
43
+ help="Files or directories to rewrite (directories are walked recursively).",
44
+ )
45
+ parser.add_argument(
46
+ "--check",
47
+ action="store_true",
48
+ help="Dry run: report files that need rewriting and exit non-zero, writing nothing.",
49
+ )
50
+ args = parser.parse_args(argv)
51
+
52
+ rewriter = ImportRewriter()
53
+ files = _collect_py_files(args.paths)
54
+ changed: list[Path] = []
55
+
56
+ for path in files:
57
+ if args.check:
58
+ if rewriter.file_needs_rewrite(path):
59
+ changed.append(path)
60
+ elif rewriter.rewrite_file(path):
61
+ changed.append(path)
62
+
63
+ verb = "would rewrite" if args.check else "rewrote"
64
+ for path in changed:
65
+ print(f"{verb}: {path}")
66
+
67
+ if not changed:
68
+ print(f"no changes — scanned {len(files)} file(s)")
69
+ return 0
70
+
71
+ print(f"{verb} {len(changed)} of {len(files)} file(s)")
72
+ return 1 if args.check else 0
73
+
74
+
75
+ if __name__ == "__main__":
76
+ sys.exit(main())
@@ -0,0 +1,168 @@
1
+ `pirn.backends` provides pluggable implementations of the three storage protocols (`TapestryStore`, `RunHistory`, `DataStore`) and the cloud object store base — it does not execute pipelines or process domain data.
2
+
3
+ ---
4
+
5
+ ## Mental model
6
+
7
+ Every `Tapestry` holds three backend slots, each independently swappable:
8
+
9
+ | Slot | Protocol | Stores | Default |
10
+ |------|----------|--------|---------|
11
+ | `store` | `TapestryStore` | Knot registrations (the pipeline definition) | `InMemoryStore` |
12
+ | `history` | `RunHistory` | Run results and per-knot lineage records | `InMemoryHistory` |
13
+ | `data` | `DataStore` | Intermediate values keyed by content hash | `InMemoryDataStore` |
14
+
15
+ Pass backends to `Tapestry(store=..., history=..., data=...)`. Backends that are not passed default to their in-memory counterparts. The three slots are decoupled — you can persist lineage to SQLite while keeping values in memory.
16
+
17
+ `SubscribableStore` is a mixin implemented by `InMemoryStore`, `PostgresStore`, and `ValKeyStore`. It adds `subscribe()` for live notifications when knots are registered — required for `WithContinuation` and extensible runs.
18
+
19
+ ---
20
+
21
+ ## Source map
22
+
23
+ ```
24
+ pirn/backends/
25
+ ├── base/
26
+ │ ├── tapestry_store.py TapestryStore — interface: register, get, all, snapshot
27
+ │ ├── run_history.py RunHistory — interface: record_run, get_run, query_lineage_*
28
+ │ ├── data_store.py DataStore — interface: put, get, has, delete
29
+ │ ├── subscribable_store.py SubscribableStore — mixin: subscribe() for live registration events
30
+ │ └── tapestry_snapshot.py TapestrySnapshot — frozen Pydantic model: ordered knot id list
31
+ ├── in_memory/
32
+ │ ├── in_memory_store.py InMemoryStore — TapestryStore + SubscribableStore; default
33
+ │ ├── in_memory_history.py InMemoryHistory — RunHistory; default; not persistent
34
+ │ └── in_memory_data_store.py InMemoryDataStore — DataStore; default; not persistent
35
+ ├── sqlite/
36
+ │ ├── sqlite_store.py SQLiteStore — TapestryStore backed by SQLite
37
+ │ └── sqlite_history.py SQLiteHistory — RunHistory backed by SQLite; durable
38
+ ├── postgres/
39
+ │ ├── postgres_store.py PostgresStore — TapestryStore + SubscribableStore; asyncpg
40
+ │ └── postgres_history.py PostgresHistory — RunHistory backed by Postgres; durable
41
+ ├── valkey/
42
+ │ ├── valkey_store.py ValKeyStore — TapestryStore + SubscribableStore; Valkey/Redis
43
+ │ └── valkey_data_store.py ValKeyDataStore — DataStore backed by Valkey/Redis; pickle-serialised
44
+ ├── duckdb.py DuckDBHistory — RunHistory backed by DuckDB; analytical queries
45
+ ├── s3.py S3DataStore — DataStore backed by AWS S3; pickle-serialised
46
+ ├── gcs.py GCSDataStore — DataStore backed by Google Cloud Storage
47
+ ├── azure.py AzureBlobDataStore — DataStore backed by Azure Blob Storage
48
+ └── disk.py LocalDiskDataStore — DataStore backed by local filesystem; pickle-serialised
49
+ ```
50
+
51
+ ---
52
+
53
+ ## Canonical pattern
54
+
55
+ ### Development — all in memory (default)
56
+
57
+ ```python
58
+ from pirn import Tapestry, RunRequest
59
+
60
+ # No backends passed — all three slots use in-memory defaults.
61
+ with Tapestry() as t:
62
+ ...
63
+
64
+ result = await t.run(RunRequest())
65
+ ```
66
+
67
+ ### Production — durable lineage, in-memory values
68
+
69
+ ```python
70
+ from pirn import Tapestry, RunRequest
71
+ from pirn.backends.sqlite.sqlite_history import SQLiteHistory
72
+
73
+ history = SQLiteHistory(path="pirn.db")
74
+
75
+ with Tapestry(history=history) as t:
76
+ ...
77
+
78
+ result = await t.run(RunRequest())
79
+ # result.lineage is now persisted across process restarts
80
+ ```
81
+
82
+ ### Querying lineage across runs
83
+
84
+ ```python
85
+ records = await history.query_lineage_by_knot_id("my-knot-id")
86
+ for rec in records:
87
+ print(rec.run_id, rec.outcome, rec.output_hash)
88
+ ```
89
+
90
+ ### Persisting intermediate values (S3)
91
+
92
+ ```python
93
+ from pirn.backends.s3 import S3DataStore
94
+
95
+ data = S3DataStore(bucket="my-pirn-bucket", prefix="runs/")
96
+ with Tapestry(data=data) as t:
97
+ ...
98
+ ```
99
+
100
+ ### Shared tapestry definition (Postgres — multi-process)
101
+
102
+ ```python
103
+ from pirn.backends.postgres.postgres_store import PostgresStore
104
+ from pirn.backends.postgres.postgres_history import PostgresHistory
105
+
106
+ store = PostgresStore(dsn="postgresql://user:pass@host/pirn")
107
+ history = PostgresHistory(dsn="postgresql://user:pass@host/pirn")
108
+
109
+ with Tapestry(store=store, history=history) as t:
110
+ ...
111
+ ```
112
+
113
+ ---
114
+
115
+ ## Anti-patterns
116
+
117
+ ### Using cloud DataStores with untrusted infrastructure
118
+
119
+ `S3DataStore`, `GCSDataStore`, `AzureBlobDataStore`, and `LocalDiskDataStore` serialise values with `pickle`. Any store writable by an adversary can execute arbitrary code on deserialization. Only use these backends when the backing store is fully access-controlled.
120
+
121
+ ### Assuming `InMemoryHistory` persists across runs
122
+
123
+ `InMemoryHistory` holds results in a dict for the lifetime of the process. Restarting the process loses all lineage. Use `SQLiteHistory`, `PostgresHistory`, or `DuckDBHistory` for durability.
124
+
125
+ ### Using extensible runs with non-memory `TapestryStore`
126
+
127
+ `tapestry.run(extensible=True)` (required by `WithContinuation` and `LoopSubTapestry`) calls `get_current_store()` mid-run to register new knots. Only `InMemoryStore`, `PostgresStore`, and `ValKeyStore` (all `SubscribableStore` implementors) support this. `SQLiteStore` does not.
128
+
129
+ ### Scrubbing `DataStore` values and expecting lineage to break
130
+
131
+ `DataStore` and `RunHistory` are decoupled by design. Deleting a value from the data store removes the payload but leaves the lineage hash record intact. This is intentional for GDPR-style scrubbing.
132
+
133
+ ---
134
+
135
+ ## Constraints and gotchas
136
+
137
+ - **`SQLiteHistory` runs migrations on first open.** The first `SQLiteHistory(path=...)` call creates the schema. Concurrent first-opens from multiple processes can race — initialise from a single process or use Postgres for multi-process deployments.
138
+ - **`DuckDBHistory` is optimised for analytical queries, not writes.** Use it for offline lineage analysis, not as the primary history backend of a high-throughput pipeline.
139
+ - **`ValKeyDataStore` and `LocalDiskDataStore` are pickle-based.** See anti-pattern above.
140
+ - **`PostgresStore` and `ValKeyStore` implement `SubscribableStore`.** If you need extensible runs in a distributed deployment, these are the only backends that support it.
141
+ - **Backend constructors are synchronous; connections are lazy.** `PostgresStore(dsn=...)` does not open a connection immediately. The first operation opens it. Call `await backend.close()` when done.
142
+ - **`DataStore.has()` is a cheap existence check** — use it before `get()` when a miss is a valid path, rather than catching `KeyError`.
143
+
144
+ ---
145
+
146
+ ## Quick reference
147
+
148
+ | Task | How |
149
+ |------|-----|
150
+ | Default (dev, no persistence) | `Tapestry()` — all in-memory |
151
+ | Durable lineage (single process) | `Tapestry(history=SQLiteHistory(path="pirn.db"))` |
152
+ | Durable lineage (multi-process) | `Tapestry(history=PostgresHistory(dsn=...))` |
153
+ | Analytical lineage queries | `DuckDBHistory(path="lineage.duckdb")` |
154
+ | Persist intermediate values to S3 | `Tapestry(data=S3DataStore(bucket=..., prefix=...))` |
155
+ | Persist intermediate values to GCS | `Tapestry(data=GCSDataStore(bucket=..., prefix=...))` |
156
+ | Persist intermediate values to Azure | `Tapestry(data=AzureBlobDataStore(container=..., prefix=...))` |
157
+ | Persist intermediate values to disk | `Tapestry(data=LocalDiskDataStore(root=Path("/data")))` |
158
+ | Shared tapestry definition (multi-process) | `Tapestry(store=PostgresStore(dsn=...))` |
159
+ | Shared tapestry + extensible runs | `Tapestry(store=PostgresStore(dsn=...), ...)` — `SubscribableStore` required |
160
+ | Query lineage by knot id | `await history.query_lineage_by_knot_id("my-id")` |
161
+ | Query lineage by output hash | `await history.query_lineage_by_output_hash("sha256:abc...")` |
162
+ | Check if a value is cached | `await data_store.has("sha256:abc...")` |
163
+ | Retrieve a cached value | `await data_store.get("sha256:abc...")` |
164
+ | Scrub a value (GDPR) | `await data_store.delete("sha256:abc...")` — lineage record is preserved |
165
+
166
+ ---
167
+
168
+ *See also: [pirn AGENTIC_USE.md](../../AGENTIC_USE.md)*
@@ -0,0 +1,12 @@
1
+ """Backend implementations for pirn.
2
+
3
+ Interface base classes live in ``pirn.backends.base``.
4
+ Implementations:
5
+ - ``pirn.backends.in_memory`` — in-memory (default, single-process)
6
+ - ``pirn.backends.sqlite`` — SQLite (durable, single-host)
7
+ - ``pirn.backends.postgres`` — PostgreSQL via asyncpg
8
+ - ``pirn.backends.valkey`` — ValKey/Redis
9
+ - ``pirn.backends.duckdb`` — DuckDB (analytics history)
10
+ - ``pirn.backends.disk`` — local disk data store
11
+ - ``pirn.backends.s3`` — S3 data store
12
+ """
@@ -0,0 +1,92 @@
1
+ """HMAC-SHA256 payload signing for DataStore backends.
2
+
3
+ Prevents insecure deserialization of tampered payloads (security finding C-1).
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import base64
9
+ import hashlib
10
+ import hmac
11
+ import os
12
+
13
+
14
+ class _Signer:
15
+ """Signs and verifies cloudpickle payloads with HMAC-SHA256.
16
+
17
+ The digest size is fixed at 32 bytes by the SHA256 algorithm.
18
+ """
19
+
20
+ __digest_size = 32
21
+
22
+ def __init__(self, key: bytes) -> None:
23
+ self.__key = key
24
+
25
+ @classmethod
26
+ def from_env(cls, var: str = "PIRN_SIGNING_KEY") -> _Signer:
27
+ """Construct a _Signer from a base64-encoded key in an environment variable.
28
+
29
+ Raises ``ValueError`` if the variable is unset or empty.
30
+
31
+ Example::
32
+
33
+ import secrets, base64
34
+ key_b64 = base64.b64encode(secrets.token_bytes(32)).decode()
35
+ # Set PIRN_SIGNING_KEY=<key_b64> in your environment, then:
36
+ store = LocalDiskDataStore("/data", signer=_Signer.from_env())
37
+ """
38
+ raw = os.environ.get(var)
39
+ if not raw:
40
+ raise ValueError(
41
+ f"Environment variable {var!r} is not set or empty. "
42
+ "Set it to a base64-encoded signing key before constructing a signed DataStore."
43
+ )
44
+ decoded = base64.b64decode(raw)
45
+ if len(decoded) < 32:
46
+ raise ValueError(
47
+ f"Environment variable {var!r} decoded to {len(decoded)} bytes; "
48
+ "HMAC-SHA256 requires at least 32 bytes of key material. "
49
+ 'Generate a key with: python -c "import secrets,base64; print(base64.b64encode(secrets.token_bytes(32)).decode())"'
50
+ )
51
+ return cls(decoded)
52
+
53
+ @classmethod
54
+ def test_signer(cls) -> _Signer:
55
+ """Return a deterministic signer for unit tests.
56
+
57
+ Tests that exercise the signing path use this so they don't need
58
+ env-var setup or real key material. **Never use in production.**
59
+ Production signers must come from :meth:`from_env` or a manual
60
+ construction with a per-deployment key.
61
+
62
+ Raises:
63
+ RuntimeError: If called outside a test or CI environment
64
+ (i.e. when PIRN_ENV is not set to "test" or "ci").
65
+ """
66
+ env = os.environ.get("PIRN_ENV", "").lower()
67
+ if env not in ("test", "ci"):
68
+ raise RuntimeError(
69
+ "_Signer.test_signer() must not be called in production. "
70
+ "Set PIRN_ENV=test or PIRN_ENV=ci to use this method in a "
71
+ "test or CI environment. Use _Signer.from_env() for production."
72
+ )
73
+ return cls(b"pirn-test-signer-key-not-for-production")
74
+
75
+ def sign(self, payload: bytes) -> bytes:
76
+ """Prepend a 32-byte HMAC-SHA256 signature to payload."""
77
+ sig = hmac.new(self.__key, payload, hashlib.sha256).digest()
78
+ return sig + payload
79
+
80
+ def verify(self, payload: bytes) -> bytes:
81
+ """Verify the HMAC-SHA256 signature and return the raw payload.
82
+
83
+ Raises ``ValueError`` if the payload is too short or the signature
84
+ does not match.
85
+ """
86
+ if len(payload) < self.__digest_size:
87
+ raise ValueError("payload too short to contain a signature — possible tampering")
88
+ sig, raw = payload[: self.__digest_size], payload[self.__digest_size :]
89
+ expected = hmac.new(self.__key, raw, hashlib.sha256).digest()
90
+ if not hmac.compare_digest(sig, expected):
91
+ raise ValueError("HMAC signature mismatch — payload may have been tampered with")
92
+ return raw