matrixone-python-sdk 0.1.9__tar.gz → 0.1.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {matrixone_python_sdk-0.1.9/matrixone_python_sdk.egg-info → matrixone_python_sdk-0.1.11}/PKG-INFO +199 -1
  2. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/README.md +51 -0
  3. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/README_USER.md +198 -0
  4. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_24_query_update.py +23 -6
  5. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_26_stage_operations.py +6 -6
  6. matrixone_python_sdk-0.1.11/examples/example_31_cdc_operations.py +398 -0
  7. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/__init__.py +6 -0
  8. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/async_client.py +9 -0
  9. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/base_client.py +17 -1
  10. matrixone_python_sdk-0.1.11/matrixone/cdc.py +983 -0
  11. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/cli_tools.py +1126 -51
  12. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/client.py +9 -0
  13. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/session.py +6 -0
  14. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/sqlalchemy_ext/__init__.py +52 -0
  15. matrixone_python_sdk-0.1.11/matrixone/sqlalchemy_ext/json_functions.py +296 -0
  16. matrixone_python_sdk-0.1.11/matrixone/sqlalchemy_ext/json_type.py +261 -0
  17. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11/matrixone_python_sdk.egg-info}/PKG-INFO +199 -1
  18. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone_python_sdk.egg-info/SOURCES.txt +4 -0
  19. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/pyproject.toml +2 -2
  20. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/LICENSE +0 -0
  21. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/MANIFEST.in +0 -0
  22. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_01_basic_connection.py +0 -0
  23. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_02_account_management.py +0 -0
  24. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_03_async_operations.py +0 -0
  25. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_04_transaction_management.py +0 -0
  26. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_05_snapshot_restore.py +0 -0
  27. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_06_sqlalchemy_integration.py +0 -0
  28. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_07_advanced_features.py +0 -0
  29. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_08_pubsub_operations.py +0 -0
  30. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_09_logger_integration.py +0 -0
  31. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_10_version_management.py +0 -0
  32. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_11_matrixone_version_demo.py +0 -0
  33. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_12_vector_basics.py +0 -0
  34. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_13_vector_indexes.py +0 -0
  35. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_14_vector_search.py +0 -0
  36. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_15_vector_advanced.py +0 -0
  37. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_18_snapshot_orm.py +0 -0
  38. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_19_sqlalchemy_style_orm.py +0 -0
  39. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_20_sqlalchemy_engine_integration.py +0 -0
  40. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_21_advanced_orm_features.py +0 -0
  41. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_22_unified_sql_builder.py +0 -0
  42. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_23_load_data_operations.py +0 -0
  43. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_25_metadata_operations.py +0 -0
  44. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_27_export_operations.py +0 -0
  45. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_28_sqlalchemy_select.py +0 -0
  46. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_29_complex_queries.py +0 -0
  47. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_30_with_snapshot_method.py +0 -0
  48. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_connection_hooks.py +0 -0
  49. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_dynamic_logging.py +0 -0
  50. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/examples/example_ivf_stats_complete.py +0 -0
  51. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/account.py +0 -0
  52. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/async_orm.py +0 -0
  53. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/clone.py +0 -0
  54. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/config.py +0 -0
  55. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/connection_hooks.py +0 -0
  56. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/exceptions.py +0 -0
  57. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/export.py +0 -0
  58. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/fulltext_manager.py +0 -0
  59. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/index_utils.py +0 -0
  60. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/load_data.py +0 -0
  61. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/logger.py +0 -0
  62. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/metadata.py +0 -0
  63. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/moctl.py +0 -0
  64. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/orm.py +0 -0
  65. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/pitr.py +0 -0
  66. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/pubsub.py +0 -0
  67. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/restore.py +0 -0
  68. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/search_vector_index.py +0 -0
  69. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/snapshot.py +0 -0
  70. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/sql_builder.py +0 -0
  71. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/sqlalchemy_ext/dialect.py +0 -0
  72. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/sqlalchemy_ext/fulltext_index.py +0 -0
  73. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/sqlalchemy_ext/fulltext_search.py +0 -0
  74. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/sqlalchemy_ext/hnsw_config.py +0 -0
  75. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/sqlalchemy_ext/ivf_config.py +0 -0
  76. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/sqlalchemy_ext/snapshot.py +0 -0
  77. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/sqlalchemy_ext/table_builder.py +0 -0
  78. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/sqlalchemy_ext/vector_index.py +0 -0
  79. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/sqlalchemy_ext/vector_type.py +0 -0
  80. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/sqlalchemy_select.py +0 -0
  81. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/stage.py +0 -0
  82. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/vector_manager.py +0 -0
  83. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone/version.py +0 -0
  84. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone_python_sdk.egg-info/dependency_links.txt +0 -0
  85. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone_python_sdk.egg-info/entry_points.txt +0 -0
  86. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone_python_sdk.egg-info/not-zip-safe +0 -0
  87. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone_python_sdk.egg-info/requires.txt +0 -0
  88. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/matrixone_python_sdk.egg-info/top_level.txt +0 -0
  89. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/mo_diag.py +0 -0
  90. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/requirements.txt +0 -0
  91. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/setup.cfg +0 -0
  92. {matrixone_python_sdk-0.1.9 → matrixone_python_sdk-0.1.11}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: matrixone-python-sdk
3
- Version: 0.1.9
3
+ Version: 0.1.11
4
4
  Summary: A comprehensive Python SDK for MatrixOne database operations with vector search, fulltext search, and advanced features
5
5
  Home-page: https://github.com/matrixorigin/matrixone
6
6
  Author: MatrixOne Team
@@ -106,6 +106,7 @@ A comprehensive, high-level Python SDK for MatrixOne that provides SQLAlchemy-li
106
106
  - Export to local files or external stages (``stage://`` protocol)
107
107
  - Support for raw SQL, SQLAlchemy, and MatrixOne queries
108
108
  - Transaction-aware exports for consistency
109
+ - 🔁 **Change Data Capture (CDC)**: Create, monitor, and control CDC tasks directly from Python (`client.cdc`, `async_client.cdc`)
109
110
  - 📸 **Snapshot Management**: Create and manage database snapshots at multiple levels
110
111
  - ⏰ **Point-in-Time Recovery**: PITR functionality for precise data recovery
111
112
  - 🔄 **Table Cloning**: Clone databases and tables efficiently
@@ -171,6 +172,37 @@ conda activate matrixone
171
172
  pip install matrixone-python-sdk
172
173
  ```
173
174
 
175
+ ## ⚠️ Important: Column Naming Convention
176
+
177
+ **🚨 CRITICAL: Always use lowercase with underscores (snake_case) for column names!**
178
+
179
+ MatrixOne does not support SQL standard double-quoted identifiers in queries, which causes issues with camelCase column names when using SQLAlchemy ORM.
180
+
181
+ ```python
182
+ # ❌ DON'T: CamelCase column names (will fail in SELECT queries)
183
+ class User(Base):
184
+ userName = Column(String(50)) # CREATE succeeds, SELECT fails!
185
+ userId = Column(Integer) # Will cause SQL syntax errors
186
+
187
+ # ✅ DO: Use lowercase with underscores (snake_case)
188
+ class User(Base):
189
+ user_name = Column(String(50)) # Works perfectly
190
+ user_id = Column(Integer) # All operations succeed
191
+ ```
192
+
193
+ **Why this matters:**
194
+ - ✅ CREATE TABLE works with both styles (uses backticks)
195
+ - ✅ INSERT works with both styles
196
+ - ❌ **SELECT fails with camelCase** (uses double quotes, not supported by MatrixOne)
197
+
198
+ **Example of the problem:**
199
+ ```python
200
+ # CamelCase generates: SELECT "userName" FROM user ❌ Fails!
201
+ # snake_case generates: SELECT user_name FROM user ✅ Works!
202
+ ```
203
+
204
+ ---
205
+
174
206
  ## Quick Start
175
207
 
176
208
  ### Basic Usage
@@ -317,6 +349,152 @@ client.export.to_csv_stage('s3_stage', 'backup2.csv', stmt)
317
349
  client.disconnect()
318
350
  ```
319
351
 
352
+ ### CDC Task Management
353
+
354
+ Create and control Change Data Capture (CDC) tasks directly from the Python SDK:
355
+
356
+ ```python
357
+ from matrixone import Client, build_mysql_uri
358
+
359
+ client = Client()
360
+ client.connect(database='test')
361
+
362
+ source_uri = build_mysql_uri('127.0.0.1', 6001, user='admin', password='111', account='acct')
363
+ mysql_sink = build_mysql_uri('192.168.1.100', 3306, user='root', password='111')
364
+
365
+ # Database-level replication (automatically ensures Level='database')
366
+ client.cdc.create_database_task(
367
+ task_name='replicate_sales',
368
+ source_uri=source_uri,
369
+ sink_type='mysql',
370
+ sink_uri=mysql_sink,
371
+ source_database='sales',
372
+ sink_database='sales_archive',
373
+ options={'Frequency': '30m'}
374
+ )
375
+
376
+ # Table-level replication helper (list of tuples/dicts/strings)
377
+ client.cdc.create_table_task(
378
+ task_name='replicate_key_tables',
379
+ source_uri=source_uri,
380
+ sink_type='mysql',
381
+ sink_uri=mysql_sink,
382
+ table_mappings=[('sales', 'orders', 'sales_archive', 'orders_backup'), 'sales.customers:sales_archive.customers']
383
+ )
384
+
385
+ # Pause/resume lifecycle
386
+ client.cdc.pause('replicate_sales')
387
+ client.cdc.resume('replicate_sales')
388
+
389
+ # Inspect status and per-table watermarks
390
+ tasks = client.cdc.list()
391
+ watermarks = client.cdc.list_watermarks('replicate_sales')
392
+
393
+ # Drop task when no longer needed
394
+ client.cdc.drop('replicate_sales')
395
+ client.cdc.drop('replicate_key_tables')
396
+
397
+ client.disconnect()
398
+ ```
399
+
400
+ > Async usage: call the same helpers via `await async_client.cdc.create_database_task(...)` / `create_table_task(...)` or inside `async with client.session()`.
401
+
402
+ ### CDC API Overview
403
+
404
+ The :class:`matrixone.cdc.CDCManager` class provides high-level helpers for all CDC lifecycle operations. Typical workflows are:
405
+
406
+ ```python
407
+ from matrixone import Client
408
+ from matrixone.cdc import CDCManager
409
+
410
+ client = Client()
411
+ client.connect(host='127.0.0.1', port=6001, user='root', password='111', database='test')
412
+
413
+ cdc = CDCManager(client)
414
+
415
+ # Create a table-level task
416
+ task = cdc.create_table_task(
417
+ task_name='orders_sync',
418
+ source_uri='mysql://sys#root:111@127.0.0.1:6001',
419
+ sink_type='matrixone',
420
+ sink_uri='mysql://sys#root:111@127.0.0.1:6001',
421
+ table_mappings=[('sales', 'orders', 'backup', 'orders')],
422
+ options={'Frequency': '1h', 'NoFull': True},
423
+ )
424
+
425
+ # Pause and resume
426
+ cdc.pause(task.task_name)
427
+ cdc.resume(task.task_name)
428
+
429
+ # Inspect metadata
430
+ info = cdc.get(task.task_name)
431
+ print(info.state)
432
+
433
+ # Clean up
434
+ cdc.drop(task.task_name)
435
+ client.disconnect()
436
+ ```
437
+
438
+ Key helper methods:
439
+
440
+ * `create()` – create a CDC task from a raw mapping string.
441
+ * `create_database_task()` – convenience wrapper for database-level replication.
442
+ * `create_table_task()` – convenience wrapper for table-level replication using Python data structures.
443
+ * `pause()`/`resume()`/`restart()` – control CDC task state.
444
+ * `list()`/`get()` – retrieve CDC task metadata as :class:`matrixone.cdc.CDCTaskInfo` objects.
445
+ * `list_watermarks()` – read per-table watermarks as :class:`matrixone.cdc.CDCWatermarkInfo`.
446
+ * `list_failing_tasks()` – return tasks whose `err_msg` field is populated.
447
+ * `list_stuck_tasks()` – highlight running tasks that have per-table errors.
448
+ * `list_late_table_watermarks()` – detect tables whose watermarks lag behind expected thresholds (supports custom per-task/per-table overrides).
449
+
450
+ Refer to :class:`matrixone.cdc.CDCManager` docstrings or ``examples/example_31_cdc_operations.py`` for a comprehensive lifecycle example.
451
+
452
+ ```python
453
+ from matrixone import Client, build_mysql_uri
454
+
455
+ client = Client()
456
+ client.connect(database='test')
457
+
458
+ source_uri = build_mysql_uri('127.0.0.1', 6001, user='admin', password='111', account='acct')
459
+ mysql_sink = build_mysql_uri('192.168.1.100', 3306, user='root', password='111')
460
+
461
+ # Database-level replication (automatically ensures Level='database')
462
+ client.cdc.create_database_task(
463
+ task_name='replicate_sales',
464
+ source_uri=source_uri,
465
+ sink_type='mysql',
466
+ sink_uri=mysql_sink,
467
+ source_database='sales',
468
+ sink_database='sales_archive',
469
+ options={'Frequency': '30m'}
470
+ )
471
+
472
+ # Table-level replication helper (list of tuples/dicts/strings)
473
+ client.cdc.create_table_task(
474
+ task_name='replicate_key_tables',
475
+ source_uri=source_uri,
476
+ sink_type='mysql',
477
+ sink_uri=mysql_sink,
478
+ table_mappings=[('sales', 'orders', 'sales_archive', 'orders_backup'), 'sales.customers:sales_archive.customers']
479
+ )
480
+
481
+ # Pause/resume lifecycle
482
+ client.cdc.pause('replicate_sales')
483
+ client.cdc.resume('replicate_sales')
484
+
485
+ # Inspect status and per-table watermarks
486
+ tasks = client.cdc.list()
487
+ watermarks = client.cdc.list_watermarks('replicate_sales')
488
+
489
+ # Drop task when no longer needed
490
+ client.cdc.drop('replicate_sales')
491
+ client.cdc.drop('replicate_key_tables')
492
+
493
+ client.disconnect()
494
+ ```
495
+
496
+ > Async usage: call the same helpers via `await async_client.cdc.create_database_task(...)` / `create_table_task(...)` or inside `async with client.session()`.
497
+
320
498
  ### Wrapping Existing Sessions (For Legacy Code)
321
499
 
322
500
  If you have existing SQLAlchemy code, wrap your sessions to add MatrixOne features:
@@ -554,6 +732,26 @@ mo-diag -d test -c "sql SELECT COUNT(*) FROM my_table"
554
732
  mo-diag -d test -c "flush_table my_table"
555
733
  ```
556
734
 
735
+ #### CDC Task Shortcuts
736
+
737
+ CDC management is integrated with the CLI. The interactive commands (`cdc_tasks`,
738
+ `cdc_task`, `cdc_create`, `cdc_drop`) now have non-interactive equivalents
739
+ under `mo-diag cdc`, making automation straightforward.
740
+
741
+ ```bash
742
+ # Summaries (mirrors cdc_tasks)
743
+ mo-diag cdc show
744
+
745
+ # Inspect a task and tighten watermark thresholds
746
+ mo-diag cdc show nightly_sync --details --threshold=5m
747
+
748
+ # Start the guided creator in table mode
749
+ mo-diag cdc create --table-level
750
+
751
+ # Drop a task without interactive prompts
752
+ mo-diag cdc drop nightly_sync --force
753
+ ```
754
+
557
755
  ### Available Commands
558
756
 
559
757
  #### Index Management
@@ -70,6 +70,37 @@ A comprehensive Python SDK for MatrixOne that provides SQLAlchemy-like interface
70
70
  - Non-interactive mode for scripting and automation
71
71
  - Batch operations on tables and indexes
72
72
 
73
+ ## ⚠️ Important: Column Naming Convention
74
+
75
+ **🚨 CRITICAL: Always use lowercase with underscores (snake_case) for column names!**
76
+
77
+ MatrixOne does not support SQL standard double-quoted identifiers in queries, which causes issues with camelCase column names when using SQLAlchemy ORM.
78
+
79
+ ```python
80
+ # ❌ DON'T: CamelCase column names (will fail in SELECT queries)
81
+ class User(Base):
82
+ userName = Column(String(50)) # CREATE succeeds, SELECT fails!
83
+ userId = Column(Integer) # Will cause SQL syntax errors
84
+
85
+ # ✅ DO: Use lowercase with underscores (snake_case)
86
+ class User(Base):
87
+ user_name = Column(String(50)) # Works perfectly
88
+ user_id = Column(Integer) # All operations succeed
89
+ ```
90
+
91
+ **Why this matters:**
92
+ - ✅ CREATE TABLE works with both styles (uses backticks)
93
+ - ✅ INSERT works with both styles
94
+ - ❌ **SELECT fails with camelCase** (uses double quotes, not supported by MatrixOne)
95
+
96
+ **Example of the problem:**
97
+ ```python
98
+ # CamelCase generates: SELECT "userName" FROM user ❌ Fails!
99
+ # snake_case generates: SELECT user_name FROM user ✅ Works!
100
+ ```
101
+
102
+ ---
103
+
73
104
  ## 🚀 Installation
74
105
 
75
106
  ### From PyPI (Stable Release)
@@ -793,6 +824,26 @@ mo-diag -d test -c "sql SELECT COUNT(*) FROM my_table"
793
824
  mo-diag -d test -c "flush_table my_table"
794
825
  ```
795
826
 
827
+ #### CDC Task Shortcuts
828
+
829
+ Manage Change Data Capture directly from the shell or scripts. The interactive
830
+ commands (`cdc_tasks`, `cdc_task`, `cdc_create`, `cdc_drop`) now have matching
831
+ non-interactive helpers via `mo-diag cdc`.
832
+
833
+ ```bash
834
+ # Summaries (same as running cdc_tasks inside the shell)
835
+ mo-diag cdc show
836
+
837
+ # Inspect a task with watermark threshold control
838
+ mo-diag cdc show nightly_sync --details --threshold=5m
839
+
840
+ # Launch the guided creator with a forced mode
841
+ mo-diag cdc create --table-level
842
+
843
+ # Drop a task with explicit confirmation override
844
+ mo-diag cdc drop nightly_sync --force
845
+ ```
846
+
796
847
  ### Available Commands
797
848
 
798
849
  #### Index Management
@@ -38,6 +38,7 @@ A comprehensive, high-level Python SDK for MatrixOne that provides SQLAlchemy-li
38
38
  - Export to local files or external stages (``stage://`` protocol)
39
39
  - Support for raw SQL, SQLAlchemy, and MatrixOne queries
40
40
  - Transaction-aware exports for consistency
41
+ - 🔁 **Change Data Capture (CDC)**: Create, monitor, and control CDC tasks directly from Python (`client.cdc`, `async_client.cdc`)
41
42
  - 📸 **Snapshot Management**: Create and manage database snapshots at multiple levels
42
43
  - ⏰ **Point-in-Time Recovery**: PITR functionality for precise data recovery
43
44
  - 🔄 **Table Cloning**: Clone databases and tables efficiently
@@ -103,6 +104,37 @@ conda activate matrixone
103
104
  pip install matrixone-python-sdk
104
105
  ```
105
106
 
107
+ ## ⚠️ Important: Column Naming Convention
108
+
109
+ **🚨 CRITICAL: Always use lowercase with underscores (snake_case) for column names!**
110
+
111
+ MatrixOne does not support SQL standard double-quoted identifiers in queries, which causes issues with camelCase column names when using SQLAlchemy ORM.
112
+
113
+ ```python
114
+ # ❌ DON'T: CamelCase column names (will fail in SELECT queries)
115
+ class User(Base):
116
+ userName = Column(String(50)) # CREATE succeeds, SELECT fails!
117
+ userId = Column(Integer) # Will cause SQL syntax errors
118
+
119
+ # ✅ DO: Use lowercase with underscores (snake_case)
120
+ class User(Base):
121
+ user_name = Column(String(50)) # Works perfectly
122
+ user_id = Column(Integer) # All operations succeed
123
+ ```
124
+
125
+ **Why this matters:**
126
+ - ✅ CREATE TABLE works with both styles (uses backticks)
127
+ - ✅ INSERT works with both styles
128
+ - ❌ **SELECT fails with camelCase** (uses double quotes, not supported by MatrixOne)
129
+
130
+ **Example of the problem:**
131
+ ```python
132
+ # CamelCase generates: SELECT "userName" FROM user ❌ Fails!
133
+ # snake_case generates: SELECT user_name FROM user ✅ Works!
134
+ ```
135
+
136
+ ---
137
+
106
138
  ## Quick Start
107
139
 
108
140
  ### Basic Usage
@@ -249,6 +281,152 @@ client.export.to_csv_stage('s3_stage', 'backup2.csv', stmt)
249
281
  client.disconnect()
250
282
  ```
251
283
 
284
+ ### CDC Task Management
285
+
286
+ Create and control Change Data Capture (CDC) tasks directly from the Python SDK:
287
+
288
+ ```python
289
+ from matrixone import Client, build_mysql_uri
290
+
291
+ client = Client()
292
+ client.connect(database='test')
293
+
294
+ source_uri = build_mysql_uri('127.0.0.1', 6001, user='admin', password='111', account='acct')
295
+ mysql_sink = build_mysql_uri('192.168.1.100', 3306, user='root', password='111')
296
+
297
+ # Database-level replication (automatically ensures Level='database')
298
+ client.cdc.create_database_task(
299
+ task_name='replicate_sales',
300
+ source_uri=source_uri,
301
+ sink_type='mysql',
302
+ sink_uri=mysql_sink,
303
+ source_database='sales',
304
+ sink_database='sales_archive',
305
+ options={'Frequency': '30m'}
306
+ )
307
+
308
+ # Table-level replication helper (list of tuples/dicts/strings)
309
+ client.cdc.create_table_task(
310
+ task_name='replicate_key_tables',
311
+ source_uri=source_uri,
312
+ sink_type='mysql',
313
+ sink_uri=mysql_sink,
314
+ table_mappings=[('sales', 'orders', 'sales_archive', 'orders_backup'), 'sales.customers:sales_archive.customers']
315
+ )
316
+
317
+ # Pause/resume lifecycle
318
+ client.cdc.pause('replicate_sales')
319
+ client.cdc.resume('replicate_sales')
320
+
321
+ # Inspect status and per-table watermarks
322
+ tasks = client.cdc.list()
323
+ watermarks = client.cdc.list_watermarks('replicate_sales')
324
+
325
+ # Drop task when no longer needed
326
+ client.cdc.drop('replicate_sales')
327
+ client.cdc.drop('replicate_key_tables')
328
+
329
+ client.disconnect()
330
+ ```
331
+
332
+ > Async usage: call the same helpers via `await async_client.cdc.create_database_task(...)` / `create_table_task(...)` or inside `async with client.session()`.
333
+
334
+ ### CDC API Overview
335
+
336
+ The :class:`matrixone.cdc.CDCManager` class provides high-level helpers for all CDC lifecycle operations. Typical workflows are:
337
+
338
+ ```python
339
+ from matrixone import Client
340
+ from matrixone.cdc import CDCManager
341
+
342
+ client = Client()
343
+ client.connect(host='127.0.0.1', port=6001, user='root', password='111', database='test')
344
+
345
+ cdc = CDCManager(client)
346
+
347
+ # Create a table-level task
348
+ task = cdc.create_table_task(
349
+ task_name='orders_sync',
350
+ source_uri='mysql://sys#root:111@127.0.0.1:6001',
351
+ sink_type='matrixone',
352
+ sink_uri='mysql://sys#root:111@127.0.0.1:6001',
353
+ table_mappings=[('sales', 'orders', 'backup', 'orders')],
354
+ options={'Frequency': '1h', 'NoFull': True},
355
+ )
356
+
357
+ # Pause and resume
358
+ cdc.pause(task.task_name)
359
+ cdc.resume(task.task_name)
360
+
361
+ # Inspect metadata
362
+ info = cdc.get(task.task_name)
363
+ print(info.state)
364
+
365
+ # Clean up
366
+ cdc.drop(task.task_name)
367
+ client.disconnect()
368
+ ```
369
+
370
+ Key helper methods:
371
+
372
+ * `create()` – create a CDC task from a raw mapping string.
373
+ * `create_database_task()` – convenience wrapper for database-level replication.
374
+ * `create_table_task()` – convenience wrapper for table-level replication using Python data structures.
375
+ * `pause()`/`resume()`/`restart()` – control CDC task state.
376
+ * `list()`/`get()` – retrieve CDC task metadata as :class:`matrixone.cdc.CDCTaskInfo` objects.
377
+ * `list_watermarks()` – read per-table watermarks as :class:`matrixone.cdc.CDCWatermarkInfo`.
378
+ * `list_failing_tasks()` – return tasks whose `err_msg` field is populated.
379
+ * `list_stuck_tasks()` – highlight running tasks that have per-table errors.
380
+ * `list_late_table_watermarks()` – detect tables whose watermarks lag behind expected thresholds (supports custom per-task/per-table overrides).
381
+
382
+ Refer to :class:`matrixone.cdc.CDCManager` docstrings or ``examples/example_31_cdc_operations.py`` for a comprehensive lifecycle example.
383
+
384
+ ```python
385
+ from matrixone import Client, build_mysql_uri
386
+
387
+ client = Client()
388
+ client.connect(database='test')
389
+
390
+ source_uri = build_mysql_uri('127.0.0.1', 6001, user='admin', password='111', account='acct')
391
+ mysql_sink = build_mysql_uri('192.168.1.100', 3306, user='root', password='111')
392
+
393
+ # Database-level replication (automatically ensures Level='database')
394
+ client.cdc.create_database_task(
395
+ task_name='replicate_sales',
396
+ source_uri=source_uri,
397
+ sink_type='mysql',
398
+ sink_uri=mysql_sink,
399
+ source_database='sales',
400
+ sink_database='sales_archive',
401
+ options={'Frequency': '30m'}
402
+ )
403
+
404
+ # Table-level replication helper (list of tuples/dicts/strings)
405
+ client.cdc.create_table_task(
406
+ task_name='replicate_key_tables',
407
+ source_uri=source_uri,
408
+ sink_type='mysql',
409
+ sink_uri=mysql_sink,
410
+ table_mappings=[('sales', 'orders', 'sales_archive', 'orders_backup'), 'sales.customers:sales_archive.customers']
411
+ )
412
+
413
+ # Pause/resume lifecycle
414
+ client.cdc.pause('replicate_sales')
415
+ client.cdc.resume('replicate_sales')
416
+
417
+ # Inspect status and per-table watermarks
418
+ tasks = client.cdc.list()
419
+ watermarks = client.cdc.list_watermarks('replicate_sales')
420
+
421
+ # Drop task when no longer needed
422
+ client.cdc.drop('replicate_sales')
423
+ client.cdc.drop('replicate_key_tables')
424
+
425
+ client.disconnect()
426
+ ```
427
+
428
+ > Async usage: call the same helpers via `await async_client.cdc.create_database_task(...)` / `create_table_task(...)` or inside `async with client.session()`.
429
+
252
430
  ### Wrapping Existing Sessions (For Legacy Code)
253
431
 
254
432
  If you have existing SQLAlchemy code, wrap your sessions to add MatrixOne features:
@@ -486,6 +664,26 @@ mo-diag -d test -c "sql SELECT COUNT(*) FROM my_table"
486
664
  mo-diag -d test -c "flush_table my_table"
487
665
  ```
488
666
 
667
+ #### CDC Task Shortcuts
668
+
669
+ CDC management is integrated with the CLI. The interactive commands (`cdc_tasks`,
670
+ `cdc_task`, `cdc_create`, `cdc_drop`) now have non-interactive equivalents
671
+ under `mo-diag cdc`, making automation straightforward.
672
+
673
+ ```bash
674
+ # Summaries (mirrors cdc_tasks)
675
+ mo-diag cdc show
676
+
677
+ # Inspect a task and tighten watermark thresholds
678
+ mo-diag cdc show nightly_sync --details --threshold=5m
679
+
680
+ # Start the guided creator in table mode
681
+ mo-diag cdc create --table-level
682
+
683
+ # Drop a task without interactive prompts
684
+ mo-diag cdc drop nightly_sync --force
685
+ ```
686
+
489
687
  ### Available Commands
490
688
 
491
689
  #### Index Management
@@ -36,6 +36,7 @@ from matrixone import Client
36
36
  from matrixone.orm import declarative_base
37
37
  from matrixone.config import get_connection_params
38
38
  from sqlalchemy import Column, Integer, String, DECIMAL, TIMESTAMP, func
39
+ from sqlalchemy.dialects.mysql import JSON
39
40
 
40
41
  # Create MatrixOne logger
41
42
  logger = logging.getLogger(__name__)
@@ -59,6 +60,7 @@ class User(Base):
59
60
  login_count = Column(Integer, default=0)
60
61
  last_login = Column(TIMESTAMP)
61
62
  created_at = Column(TIMESTAMP, server_default=func.current_timestamp())
63
+ preferences = Column(JSON, nullable=True) # JSON field for user preferences
62
64
 
63
65
 
64
66
  class Product(Base):
@@ -87,7 +89,8 @@ def demo_simple_updates():
87
89
  # Create tables
88
90
  client.create_all(Base)
89
91
 
90
- # Insert sample data
92
+ # Insert sample data with JSON fields
93
+ # Note: Python dict in JSON column is automatically serialized
91
94
  users_data = [
92
95
  {
93
96
  "id": 1,
@@ -98,6 +101,7 @@ def demo_simple_updates():
98
101
  "salary": 50000.00,
99
102
  "status": "active",
100
103
  "login_count": 5,
104
+ "preferences": {"theme": "dark", "language": "en", "notifications": True},
101
105
  },
102
106
  {
103
107
  "id": 2,
@@ -108,6 +112,7 @@ def demo_simple_updates():
108
112
  "salary": 60000.00,
109
113
  "status": "active",
110
114
  "login_count": 10,
115
+ "preferences": {"theme": "light", "language": "zh", "notifications": False},
111
116
  },
112
117
  {
113
118
  "id": 3,
@@ -118,18 +123,22 @@ def demo_simple_updates():
118
123
  "salary": 70000.00,
119
124
  "status": "inactive",
120
125
  "login_count": 2,
126
+ "preferences": {"theme": "auto", "language": "en", "email_digest": "daily"},
121
127
  },
122
128
  ]
129
+ # Using batch_insert with JSON dict - auto-serialization
123
130
  client.batch_insert("example_users_update", users_data)
131
+ print(" ✓ Inserted users with JSON preferences using batch_insert()")
124
132
 
125
133
  # 1. Simple update with key-value pairs
126
- print("1. Simple update with key-value pairs")
134
+ print("\n1. Simple update with key-value pairs")
127
135
  query = client.query(User)
128
136
  result = query.update(full_name="Alice Updated", email="alice.updated@example.com").filter(User.id == 1).execute()
129
137
 
130
138
  # Verify the update
131
139
  updated_user = client.query(User).filter(User.id == 1).first()
132
140
  print(f" Updated user: {updated_user.full_name}, {updated_user.email}")
141
+ print(f" User preferences (JSON): {updated_user.preferences}")
133
142
 
134
143
  # 2. Update multiple records with condition
135
144
  print("\n2. Update multiple records with condition")
@@ -165,7 +174,7 @@ def demo_sqlalchemy_expressions():
165
174
  # Create tables
166
175
  client.create_all(Base)
167
176
 
168
- # Insert sample data
177
+ # Insert sample data with JSON - Using batch_insert
169
178
  users_data = [
170
179
  {
171
180
  "id": 1,
@@ -176,7 +185,14 @@ def demo_sqlalchemy_expressions():
176
185
  "salary": 50000.00,
177
186
  "status": "active",
178
187
  "login_count": 5,
188
+ "preferences": {"theme": "dark", "language": "en"},
179
189
  },
190
+ ]
191
+ client.batch_insert("example_users_update", users_data)
192
+
193
+ # Insert another user using client.insert() with JSON dict
194
+ client.insert(
195
+ "example_users_update",
180
196
  {
181
197
  "id": 2,
182
198
  "username": "bob",
@@ -186,12 +202,13 @@ def demo_sqlalchemy_expressions():
186
202
  "salary": 60000.00,
187
203
  "status": "active",
188
204
  "login_count": 10,
205
+ "preferences": {"theme": "light", "language": "zh", "notifications": False},
189
206
  },
190
- ]
191
- client.batch_insert("example_users_update", users_data)
207
+ )
208
+ print(" Inserted users using batch_insert() and insert() with JSON dicts")
192
209
 
193
210
  # 1. Update with SQLAlchemy expressions
194
- print("1. Update with SQLAlchemy expressions")
211
+ print("\n1. Update with SQLAlchemy expressions")
195
212
  query = client.query(User)
196
213
  result = query.update(login_count=User.login_count + 1, salary=User.salary * 1.1).filter(User.id == 1).execute()
197
214
 
@@ -225,7 +225,7 @@ class StageOperationsDemo:
225
225
  self.results['files_created'].append(csv_file)
226
226
 
227
227
  # Load data from stage using client.load_data
228
- result = client.load_data.from_stage_csv('demo_file_stage', 'stage_users.csv', StageUser)
228
+ result = client.load_data.read_csv_stage('demo_file_stage', 'stage_users.csv', StageUser)
229
229
  self.logger.info(f"✅ Loaded {result.affected_rows} rows from stage")
230
230
 
231
231
  # Verify data
@@ -392,7 +392,7 @@ class StageOperationsDemo:
392
392
  # Load data from stage within session
393
393
  with client.session() as tx:
394
394
  # Load data from pre-existing stage in session
395
- result = tx.load_data.from_stage_csv('tx_temp_stage', 'tx_data.csv', TxStageData)
395
+ result = tx.load_data.read_csv_stage('tx_temp_stage', 'tx_data.csv', TxStageData)
396
396
  # Check for both ResultSet and SQLAlchemy Result
397
397
  affected = result.affected_rows if hasattr(result, 'affected_rows') else result.rowcount
398
398
  self.logger.info(f"✅ Loaded {affected} rows in session")
@@ -706,7 +706,7 @@ class StageOperationsDemo:
706
706
 
707
707
  # Load customers
708
708
  print("Loading customers dimension...")
709
- stage.load_csv('customers.csv', Customer, ignore_lines=1)
709
+ stage.load_csv('customers.csv', Customer, skiprows=1)
710
710
  count = client.query(Customer).count()
711
711
  print(f"✅ Loaded {count} customers")
712
712
 
@@ -718,13 +718,13 @@ class StageOperationsDemo:
718
718
 
719
719
  # Load orders
720
720
  print("Loading orders fact table...")
721
- stage.load_csv('orders.csv', Order, ignore_lines=1)
721
+ stage.load_csv('orders.csv', Order, skiprows=1)
722
722
  count = client.query(Order).count()
723
723
  print(f"✅ Loaded {count} orders")
724
724
 
725
725
  # Load shipping (TSV)
726
726
  print("Loading shipping logistics...")
727
- stage.load_tsv('shipping.tsv', Shipping, ignore_lines=1)
727
+ stage.load_tsv('shipping.tsv', Shipping, skiprows=1)
728
728
  count = client.query(Shipping).count()
729
729
  print(f"✅ Loaded {count} shipping records")
730
730
 
@@ -794,7 +794,7 @@ class StageOperationsDemo:
794
794
  self.results['files_created'].append(delta_file)
795
795
 
796
796
  print("📥 Loading incremental orders...")
797
- stage.load_csv('orders_delta.csv', Order, ignore_lines=1)
797
+ stage.load_csv('orders_delta.csv', Order, skiprows=1)
798
798
  count = client.query(Order).count()
799
799
  print(f"✅ Total orders now: {count} (+2 new)")
800
800