@robosystems/client 0.2.14 → 0.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/sdk/sdk.gen.js CHANGED
@@ -2,8 +2,8 @@
2
2
  // This file is auto-generated by @hey-api/openapi-ts
3
3
  Object.defineProperty(exports, "__esModule", { value: true });
4
4
  exports.batchProcessQueries = exports.executeSpecificAgent = exports.getAgentMetadata = exports.autoSelectAgent = exports.listAgents = exports.syncConnection = exports.getConnection = exports.deleteConnection = exports.oauthCallback = exports.initOAuth = exports.createLinkToken = exports.exchangeLinkToken = exports.getConnectionOptions = exports.createConnection = exports.listConnections = exports.getOrgUsage = exports.getOrgLimits = exports.updateOrgMemberRole = exports.removeOrgMember = exports.inviteOrgMember = exports.listOrgMembers = exports.listOrgGraphs = exports.updateOrg = exports.getOrg = exports.createOrg = exports.listUserOrgs = exports.updateUserApiKey = exports.revokeUserApiKey = exports.createUserApiKey = exports.listUserApiKeys = exports.updateUserPassword = exports.updateUser = exports.getCurrentUser = exports.getServiceStatus = exports.getCaptchaConfig = exports.completeSsoAuth = exports.ssoTokenExchange = exports.generateSsoToken = exports.resetPassword = exports.validateResetToken = exports.forgotPassword = exports.checkPasswordStrength = exports.getPasswordPolicy = exports.verifyEmail = exports.resendVerificationEmail = exports.refreshAuthSession = exports.getCurrentAuthUser = exports.logoutUser = exports.loginUser = exports.registerUser = void 0;
5
- exports.listOrgSubscriptions = exports.createPortalSession = exports.getOrgBillingCustomer = exports.cancelOperation = exports.getOperationStatus = exports.streamOperationEvents = exports.getServiceOfferings = exports.selectGraph = exports.getAvailableGraphTiers = exports.getAvailableExtensions = exports.createGraph = exports.getGraphs = exports.queryTables = exports.ingestTables = exports.updateFileStatus = exports.getFileInfo = exports.deleteFile = exports.getUploadUrl = exports.listTableFiles = exports.listTables = exports.upgradeSubscription = exports.createRepositorySubscription = exports.getGraphSubscription = exports.getSubgraphQuota = exports.getSubgraphInfo = exports.deleteSubgraph = exports.createSubgraph = exports.listSubgraphs = exports.getGraphLimits = exports.getDatabaseInfo = exports.getDatabaseHealth = exports.checkStorageLimits = exports.getStorageUsage = exports.checkCreditBalance = exports.listCreditTransactions = exports.getCreditSummary = exports.validateSchema = exports.exportGraphSchema = exports.getGraphSchema = exports.executeCypherQuery = exports.getGraphUsageAnalytics = exports.getGraphMetrics = exports.getBackupStats = exports.restoreBackup = exports.getBackupDownloadUrl = exports.createBackup = exports.listBackups = exports.callMcpTool = exports.listMcpTools = exports.recommendAgent = void 0;
6
- exports.getCheckoutStatus = exports.createCheckoutSession = exports.getOrgUpcomingInvoice = exports.listOrgInvoices = exports.cancelOrgSubscription = exports.getOrgSubscription = void 0;
5
+ exports.cancelOperation = exports.getOperationStatus = exports.streamOperationEvents = exports.getServiceOfferings = exports.selectGraph = exports.getAvailableGraphTiers = exports.getAvailableExtensions = exports.createGraph = exports.getGraphs = exports.updateFile = exports.getFile = exports.deleteFile = exports.createFileUpload = exports.listFiles = exports.materializeGraph = exports.getMaterializationStatus = exports.saveView = exports.createView = exports.queryTables = exports.listTables = exports.upgradeSubscription = exports.createRepositorySubscription = exports.getGraphSubscription = exports.getSubgraphQuota = exports.getSubgraphInfo = exports.deleteSubgraph = exports.createSubgraph = exports.listSubgraphs = exports.getGraphLimits = exports.getDatabaseInfo = exports.getDatabaseHealth = exports.checkStorageLimits = exports.getStorageUsage = exports.checkCreditBalance = exports.listCreditTransactions = exports.getCreditSummary = exports.validateSchema = exports.exportGraphSchema = exports.getGraphSchema = exports.executeCypherQuery = exports.getGraphUsageAnalytics = exports.getGraphMetrics = exports.getBackupStats = exports.restoreBackup = exports.getBackupDownloadUrl = exports.createBackup = exports.listBackups = exports.callMcpTool = exports.listMcpTools = exports.recommendAgent = void 0;
6
+ exports.getCheckoutStatus = exports.createCheckoutSession = exports.getOrgUpcomingInvoice = exports.listOrgInvoices = exports.cancelOrgSubscription = exports.getOrgSubscription = exports.listOrgSubscriptions = exports.createPortalSession = exports.getOrgBillingCustomer = void 0;
7
7
  const client_gen_1 = require("./client.gen");
8
8
  /**
9
9
  * Register New User
@@ -1712,11 +1712,14 @@ const getGraphUsageAnalytics = (options) => {
1712
1712
  };
1713
1713
  exports.getGraphUsageAnalytics = getGraphUsageAnalytics;
1714
1714
  /**
1715
- * Execute Cypher Query (Read-Only)
1716
- * Execute a read-only Cypher query with intelligent response optimization.
1715
+ * Execute Cypher Query
1716
+ * Execute a Cypher query with intelligent response optimization.
1717
1717
  *
1718
- * **IMPORTANT: This endpoint is READ-ONLY.** Write operations (CREATE, MERGE, SET, DELETE) are not allowed.
1719
- * To load data into your graph, use the staging pipeline:
1718
+ * **IMPORTANT: Write operations depend on graph type:**
1719
+ * - **Main Graphs**: READ-ONLY. Write operations (CREATE, MERGE, SET, DELETE) are not allowed.
1720
+ * - **Subgraphs**: WRITE-ENABLED. Full Cypher write operations are supported for development and report creation.
1721
+ *
1722
+ * To load data into main graphs, use the staging pipeline:
1720
1723
  * 1. Create file upload: `POST /v1/graphs/{graph_id}/tables/{table_name}/files`
1721
1724
  * 2. Ingest to graph: `POST /v1/graphs/{graph_id}/tables/ingest`
1722
1725
  *
@@ -2293,7 +2296,7 @@ const listSubgraphs = (options) => {
2293
2296
  exports.listSubgraphs = listSubgraphs;
2294
2297
  /**
2295
2298
  * Create Subgraph
2296
- * Create a new subgraph within a parent graph.
2299
+ * Create a new subgraph within a parent graph, with optional data forking.
2297
2300
  *
2298
2301
  * **Requirements:**
2299
2302
  * - Valid authentication
@@ -2303,9 +2306,18 @@ exports.listSubgraphs = listSubgraphs;
2303
2306
  * - Must be within subgraph quota limits
2304
2307
  * - Subgraph name must be unique within the parent graph
2305
2308
  *
2309
+ * **Fork Mode:**
2310
+ * When `fork_parent=true`, the operation:
2311
+ * - Returns immediately with an operation_id for SSE monitoring
2312
+ * - Copies data from parent graph to the new subgraph
2313
+ * - Supports selective forking via metadata.fork_options
2314
+ * - Tracks progress in real-time via SSE
2315
+ *
2306
2316
  * **Returns:**
2307
- * - Created subgraph details including its unique ID
2308
- * - Subgraph ID format: `{parent_id}_{subgraph_name}` (e.g., kg1234567890abcdef_dev)
2317
+ * - Without fork: Immediate SubgraphResponse with created subgraph details
2318
+ * - With fork: Operation response with SSE monitoring endpoint
2319
+ *
2320
+ * **Subgraph ID format:** `{parent_id}_{subgraph_name}` (e.g., kg1234567890abcdef_dev)
2309
2321
  *
2310
2322
  * **Usage:**
2311
2323
  * - Subgraphs share parent's credit pool
@@ -2601,44 +2613,66 @@ const listTables = (options) => {
2601
2613
  };
2602
2614
  exports.listTables = listTables;
2603
2615
  /**
2604
- * List Files in Staging Table
2605
- * List all files uploaded to a staging table with comprehensive metadata.
2616
+ * Query Staging Tables with SQL
2617
+ * Execute SQL queries on DuckDB staging tables for data inspection and validation.
2618
+ *
2619
+ * Query raw staging data directly with SQL before ingestion into the graph database.
2620
+ * Useful for data quality checks, validation, and exploratory analysis.
2621
+ *
2622
+ * **Security Best Practice - Use Parameterized Queries:**
2623
+ * ALWAYS use query parameters instead of string concatenation to prevent SQL injection:
2624
+ * - ✅ SAFE: `SELECT * FROM Entity WHERE type = ? LIMIT ?` with `parameters: ["Company", 100]`
2625
+ * - ❌ UNSAFE: `SELECT * FROM Entity WHERE type = 'Company' LIMIT 100` with user input concatenated into SQL string
2606
2626
  *
2607
- * Get a complete inventory of all files in a staging table, including upload status,
2608
- * file sizes, row counts, and S3 locations. Essential for monitoring upload progress
2609
- * and validating data before ingestion.
2627
+ * Query parameters provide automatic escaping and type safety. Use `?` placeholders with parameters array.
2610
2628
  *
2611
2629
  * **Use Cases:**
2612
- * - Monitor file upload progress
2613
- * - Verify files are ready for ingestion
2614
- * - Check file formats and sizes
2615
- * - Track storage usage per table
2616
- * - Identify failed or incomplete uploads
2617
- * - Pre-ingestion validation
2630
+ * - Validate data quality before graph ingestion
2631
+ * - Inspect row-level data for debugging
2632
+ * - Run analytics on staging tables
2633
+ * - Check for duplicates, nulls, or data issues
2634
+ * - Preview data transformations
2618
2635
  *
2619
- * **Returned Metadata:**
2620
- * - File ID, name, and format (parquet, csv, json)
2621
- * - Size in bytes and row count (if available)
2622
- * - Upload status and method
2623
- * - Creation and upload timestamps
2624
- * - S3 key for reference
2636
+ * **Workflow:**
2637
+ * 1. Upload data files via `POST /tables/{table_name}/files`
2638
+ * 2. Query staging tables to validate: `POST /tables/query`
2639
+ * 3. Fix any data issues by re-uploading
2640
+ * 4. Ingest validated data: `POST /tables/ingest`
2625
2641
  *
2626
- * **Upload Status Values:**
2627
- * - `pending`: Upload URL generated, awaiting upload
2628
- * - `uploaded`: Successfully uploaded, ready for ingestion
2629
- * - `disabled`: Excluded from ingestion
2630
- * - `archived`: Soft deleted
2631
- * - `failed`: Upload failed
2642
+ * **Supported SQL:**
2643
+ * - Full DuckDB SQL syntax
2644
+ * - SELECT, JOIN, WHERE, GROUP BY, ORDER BY
2645
+ * - Aggregations, window functions, CTEs
2646
+ * - Multiple table joins across staging area
2632
2647
  *
2633
- * **Important Notes:**
2634
- * - Only `uploaded` files are ingested
2635
- * - Check `row_count` to estimate data volume
2636
- * - Use `total_size_bytes` for storage monitoring
2637
- * - Files with `failed` status should be deleted and re-uploaded
2638
- * - File listing is included - no credit consumption
2648
+ * **Common Operations:**
2649
+ * - Count rows: `SELECT COUNT(*) FROM Entity`
2650
+ * - Filter by type: `SELECT * FROM Entity WHERE entity_type = ? LIMIT ?` with `parameters: ["Company", 100]`
2651
+ * - Check for nulls: `SELECT * FROM Entity WHERE name IS NULL LIMIT 10`
2652
+ * - Find duplicates: `SELECT identifier, COUNT(*) as cnt FROM Entity GROUP BY identifier HAVING COUNT(*) > 1`
2653
+ * - Filter amounts: `SELECT * FROM Transaction WHERE amount > ? AND date >= ?` with `parameters: [1000, "2024-01-01"]`
2654
+ *
2655
+ * **Limits:**
2656
+ * - Query timeout: 30 seconds
2657
+ * - Result limit: 10,000 rows (use LIMIT clause)
2658
+ * - Read-only: No INSERT, UPDATE, DELETE
2659
+ * - User's tables only: Cannot query other users' data
2660
+ *
2661
+ * **Subgraph Support:**
2662
+ * This endpoint accepts both parent graph IDs and subgraph IDs.
2663
+ * - Parent graph: Use `graph_id` like `kg0123456789abcdef`
2664
+ * - Subgraph: Use full subgraph ID like `kg0123456789abcdef_dev`
2665
+ * Each subgraph has its own independent staging tables.
2666
+ *
2667
+ * **Shared Repositories:**
2668
+ * Shared repositories (SEC, etc.) do not allow direct SQL queries.
2669
+ * Use the graph query endpoint instead: `POST /v1/graphs/{graph_id}/query`
2670
+ *
2671
+ * **Note:**
2672
+ * Staging table queries are included - no credit consumption
2639
2673
  */
2640
- const listTableFiles = (options) => {
2641
- return (options.client ?? client_gen_1.client).get({
2674
+ const queryTables = (options) => {
2675
+ return (options.client ?? client_gen_1.client).post({
2642
2676
  security: [
2643
2677
  {
2644
2678
  name: 'X-API-Key',
@@ -2649,55 +2683,35 @@ const listTableFiles = (options) => {
2649
2683
  type: 'http'
2650
2684
  }
2651
2685
  ],
2652
- url: '/v1/graphs/{graph_id}/tables/{table_name}/files',
2653
- ...options
2686
+ url: '/v1/graphs/{graph_id}/tables/query',
2687
+ ...options,
2688
+ headers: {
2689
+ 'Content-Type': 'application/json',
2690
+ ...options.headers
2691
+ }
2654
2692
  });
2655
2693
  };
2656
- exports.listTableFiles = listTableFiles;
2694
+ exports.queryTables = queryTables;
2657
2695
  /**
2658
- * Get File Upload URL
2659
- * Generate a presigned S3 URL for secure file upload.
2660
- *
2661
- * Initiates file upload to a staging table by generating a secure, time-limited
2662
- * presigned S3 URL. Files are uploaded directly to S3, bypassing the API for
2663
- * optimal performance.
2664
- *
2665
- * **Upload Workflow:**
2666
- * 1. Call this endpoint to get presigned URL
2667
- * 2. PUT file directly to S3 URL
2668
- * 3. Call PATCH /tables/files/{file_id} with status='uploaded'
2669
- * 4. Backend validates file and calculates metrics
2670
- * 5. File ready for ingestion
2671
- *
2672
- * **Supported Formats:**
2673
- * - Parquet (`application/x-parquet` with `.parquet` extension)
2674
- * - CSV (`text/csv` with `.csv` extension)
2675
- * - JSON (`application/json` with `.json` extension)
2676
- *
2677
- * **Validation:**
2678
- * - File extension must match content type
2679
- * - File name 1-255 characters
2680
- * - No path traversal characters (.. / \)
2681
- * - Auto-creates table if it doesn't exist
2696
+ * Create View
2697
+ * Generate financial report view from data source (dual-mode support).
2682
2698
  *
2683
- * **Auto-Table Creation:**
2684
- * Tables are automatically created on first file upload with type inferred from name
2685
- * (e.g., "Transaction" relationship) and empty schema populated during ingestion.
2699
+ * **Mode 1: Transaction Aggregation (generate_from_transactions)**
2700
+ * - Aggregates raw transaction data to trial balance
2701
+ * - Creates facts on-demand
2702
+ * - Shows real-time reporting from source of truth
2686
2703
  *
2687
- * **Subgraph Support:**
2688
- * This endpoint accepts both parent graph IDs and subgraph IDs.
2689
- * - Parent graph: Use `graph_id` like `kg0123456789abcdef`
2690
- * - Subgraph: Use full subgraph ID like `kg0123456789abcdef_dev`
2691
- * Each subgraph has completely isolated S3 staging areas and tables. Files uploaded
2692
- * to one subgraph do not appear in other subgraphs.
2704
+ * **Mode 2: Existing Facts (pivot_existing_facts)**
2705
+ * - Queries existing Fact nodes
2706
+ * - Supports multi-dimensional analysis
2707
+ * - Works with SEC filings and pre-computed facts
2693
2708
  *
2694
- * **Important Notes:**
2695
- * - Presigned URLs expire (default: 1 hour)
2696
- * - Use appropriate Content-Type header when uploading to S3
2697
- * - File extension must match content type
2698
- * - Upload URL generation is included - no credit consumption
2709
+ * Both modes:
2710
+ * - Build FactGrid from data
2711
+ * - Generate pivot table presentation
2712
+ * - Return consistent response format
2699
2713
  */
2700
- const getUploadUrl = (options) => {
2714
+ const createView = (options) => {
2701
2715
  return (options.client ?? client_gen_1.client).post({
2702
2716
  security: [
2703
2717
  {
@@ -2709,7 +2723,7 @@ const getUploadUrl = (options) => {
2709
2723
  type: 'http'
2710
2724
  }
2711
2725
  ],
2712
- url: '/v1/graphs/{graph_id}/tables/{table_name}/files',
2726
+ url: '/v1/graphs/{graph_id}/views',
2713
2727
  ...options,
2714
2728
  headers: {
2715
2729
  'Content-Type': 'application/json',
@@ -2717,43 +2731,41 @@ const getUploadUrl = (options) => {
2717
2731
  }
2718
2732
  });
2719
2733
  };
2720
- exports.getUploadUrl = getUploadUrl;
2734
+ exports.createView = createView;
2721
2735
  /**
2722
- * Delete File from Staging
2723
- * Delete a file from S3 storage and database tracking.
2736
+ * Save View
2737
+ * Save or update view as materialized report in the graph.
2724
2738
  *
2725
- * Remove unwanted, duplicate, or incorrect files from staging tables before ingestion.
2726
- * The file is deleted from both S3 and database tracking, and table statistics
2727
- * are automatically recalculated.
2739
+ * Converts computed view results into persistent Report, Fact, and Structure nodes.
2740
+ * This establishes what data exists in the subgraph, which then defines what
2741
+ * needs to be exported for publishing to the parent graph.
2728
2742
  *
2729
- * **Use Cases:**
2730
- * - Remove duplicate uploads
2731
- * - Delete files with incorrect data
2732
- * - Clean up failed uploads
2733
- * - Fix data quality issues before ingestion
2734
- * - Manage storage usage
2743
+ * **Create Mode** (no report_id provided):
2744
+ * - Generates new report_id from entity + period + report type
2745
+ * - Creates new Report, Facts, and Structures
2735
2746
  *
2736
- * **What Happens:**
2737
- * 1. File deleted from S3 storage
2738
- * 2. Database tracking record removed
2739
- * 3. Table statistics recalculated (file count, size, row count)
2740
- * 4. DuckDB automatically excludes file from future queries
2747
+ * **Update Mode** (report_id provided):
2748
+ * - Deletes all existing Facts and Structures for the report
2749
+ * - Updates Report metadata
2750
+ * - Creates fresh Facts and Structures from current view
2751
+ * - Useful for refreshing reports with updated data or view configurations
2741
2752
  *
2742
- * **Security:**
2743
- * - Write access required (verified via auth)
2744
- * - Shared repositories block file deletions
2745
- * - Full audit trail of deletion operations
2746
- * - Cannot delete after ingestion to graph
2753
+ * **This is NOT publishing** - it only creates nodes in the subgraph workspace.
2754
+ * Publishing (export parquet parent ingest) happens separately.
2747
2755
  *
2748
- * **Important Notes:**
2749
- * - Delete files before ingestion for best results
2750
- * - Table statistics update automatically
2751
- * - No need to refresh DuckDB - exclusion is automatic
2752
- * - Consider re-uploading corrected version after deletion
2753
- * - File deletion is included - no credit consumption
2756
+ * Creates/Updates:
2757
+ * - Report node with metadata
2758
+ * - Fact nodes with all aspects (period, entity, element, unit)
2759
+ * - PresentationStructure nodes (how facts are displayed)
2760
+ * - CalculationStructure nodes (how facts roll up)
2761
+ *
2762
+ * Returns:
2763
+ * - report_id: Unique identifier used as parquet export prefix
2764
+ * - parquet_export_prefix: Filename prefix for future exports
2765
+ * - All created facts and structures
2754
2766
  */
2755
- const deleteFile = (options) => {
2756
- return (options.client ?? client_gen_1.client).delete({
2767
+ const saveView = (options) => {
2768
+ return (options.client ?? client_gen_1.client).post({
2757
2769
  security: [
2758
2770
  {
2759
2771
  name: 'X-API-Key',
@@ -2764,30 +2776,43 @@ const deleteFile = (options) => {
2764
2776
  type: 'http'
2765
2777
  }
2766
2778
  ],
2767
- url: '/v1/graphs/{graph_id}/tables/files/{file_id}',
2768
- ...options
2779
+ url: '/v1/graphs/{graph_id}/views/save',
2780
+ ...options,
2781
+ headers: {
2782
+ 'Content-Type': 'application/json',
2783
+ ...options.headers
2784
+ }
2769
2785
  });
2770
2786
  };
2771
- exports.deleteFile = deleteFile;
2787
+ exports.saveView = saveView;
2772
2788
  /**
2773
- * Get File Information
2774
- * Get detailed information about a specific file.
2789
+ * Get Materialization Status
2790
+ * Get current materialization status for the graph.
2775
2791
  *
2776
- * Retrieve comprehensive metadata for a single file, including upload status,
2777
- * size, row count, and timestamps. Useful for validating individual files
2778
- * before ingestion.
2792
+ * Shows whether the graph is stale (DuckDB has changes not yet in graph database),
2793
+ * when it was last materialized, and how long since last materialization.
2794
+ *
2795
+ * **Status Information:**
2796
+ * - Whether graph is currently stale
2797
+ * - Reason for staleness if applicable
2798
+ * - When graph became stale
2799
+ * - When graph was last materialized
2800
+ * - Total materialization count
2801
+ * - Hours since last materialization
2779
2802
  *
2780
2803
  * **Use Cases:**
2781
- * - Validate file upload completion
2782
- * - Check file metadata before ingestion
2783
- * - Debug upload issues
2784
- * - Verify file format and size
2785
- * - Track file lifecycle
2804
+ * - Decide if materialization is needed
2805
+ * - Monitor graph freshness
2806
+ * - Track materialization history
2807
+ * - Understand data pipeline state
2786
2808
  *
2787
- * **Note:**
2788
- * File info retrieval is included - no credit consumption
2809
+ * **Important Notes:**
2810
+ * - Stale graph means DuckDB has changes not in graph
2811
+ * - Graph becomes stale after file deletions
2812
+ * - Materialization clears staleness
2813
+ * - Status retrieval is included - no credit consumption
2789
2814
  */
2790
- const getFileInfo = (options) => {
2815
+ const getMaterializationStatus = (options) => {
2791
2816
  return (options.client ?? client_gen_1.client).get({
2792
2817
  security: [
2793
2818
  {
@@ -2799,52 +2824,67 @@ const getFileInfo = (options) => {
2799
2824
  type: 'http'
2800
2825
  }
2801
2826
  ],
2802
- url: '/v1/graphs/{graph_id}/tables/files/{file_id}',
2827
+ url: '/v1/graphs/{graph_id}/materialize/status',
2803
2828
  ...options
2804
2829
  });
2805
2830
  };
2806
- exports.getFileInfo = getFileInfo;
2831
+ exports.getMaterializationStatus = getMaterializationStatus;
2807
2832
  /**
2808
- * Update File Upload Status
2809
- * Update file status after upload completes.
2833
+ * Materialize Graph from DuckDB
2834
+ * Rebuild entire graph from DuckDB staging tables (materialized view pattern).
2810
2835
  *
2811
- * Marks files as uploaded after successful S3 upload. The backend validates
2812
- * the file, calculates size and row count, enforces storage limits, and
2813
- * registers the DuckDB table for queries.
2836
+ * This endpoint rebuilds the complete graph database from the current state of DuckDB
2837
+ * staging tables. It automatically discovers all tables, ingests them in the correct
2838
+ * order (nodes before relationships), and clears the staleness flag.
2814
2839
  *
2815
- * **Status Values:**
2816
- * - `uploaded`: File successfully uploaded to S3 (triggers validation)
2817
- * - `disabled`: Exclude file from ingestion
2818
- * - `archived`: Soft delete file
2819
- *
2820
- * **What Happens on 'uploaded' Status:**
2821
- * 1. Verify file exists in S3
2822
- * 2. Calculate actual file size
2823
- * 3. Enforce tier storage limits
2824
- * 4. Calculate or estimate row count
2825
- * 5. Update table statistics
2826
- * 6. Register DuckDB external table
2827
- * 7. File ready for ingestion
2828
- *
2829
- * **Row Count Calculation:**
2830
- * - **Parquet**: Exact count from file metadata
2831
- * - **CSV**: Count rows (minus header)
2832
- * - **JSON**: Count array elements
2833
- * - **Fallback**: Estimate from file size if reading fails
2834
- *
2835
- * **Storage Limits:**
2836
- * Enforced per subscription tier. Returns HTTP 413 if limit exceeded.
2837
- * Check current usage before large uploads.
2840
+ * **When to Use:**
2841
+ * - After batch uploads (files uploaded with ingest_to_graph=false)
2842
+ * - After cascade file deletions (graph marked stale)
2843
+ * - To ensure graph consistency with DuckDB state
2844
+ * - Periodic full refresh
2838
2845
  *
2839
- * **Important Notes:**
2840
- * - Always call this after S3 upload completes
2841
- * - Check response for actual row count
2842
- * - Storage limit errors (413) mean tier upgrade needed
2843
- * - DuckDB registration failures are non-fatal (retried later)
2844
- * - Status updates are included - no credit consumption
2846
+ * **What Happens:**
2847
+ * 1. Discovers all tables for the graph from PostgreSQL registry
2848
+ * 2. Sorts tables (nodes before relationships)
2849
+ * 3. Ingests all tables from DuckDB to graph in order
2850
+ * 4. Clears staleness flag on success
2851
+ * 5. Returns detailed materialization report
2852
+ *
2853
+ * **Staleness Check:**
2854
+ * By default, only materializes if graph is stale (after deletions or missed ingestions).
2855
+ * Use `force=true` to rebuild regardless of staleness.
2856
+ *
2857
+ * **Rebuild Feature:**
2858
+ * Setting `rebuild=true` regenerates the entire graph database from scratch:
2859
+ * - Deletes existing graph database
2860
+ * - Recreates with fresh schema from active GraphSchema
2861
+ * - Ingests all data files
2862
+ * - Safe operation - DuckDB is source of truth
2863
+ * - Useful for schema changes or data corrections
2864
+ * - Graph marked as 'rebuilding' during process
2865
+ *
2866
+ * **Table Ordering:**
2867
+ * Node tables (PascalCase) are ingested before relationship tables (UPPERCASE) to
2868
+ * ensure referential integrity.
2869
+ *
2870
+ * **Error Handling:**
2871
+ * With `ignore_errors=true` (default), continues materializing even if individual
2872
+ * rows fail. Failed rows are logged but don't stop the process.
2873
+ *
2874
+ * **Concurrency Control:**
2875
+ * Only one materialization can run per graph at a time. If another materialization is in progress,
2876
+ * you'll receive a 409 Conflict error. The distributed lock automatically expires after
2877
+ * the configured TTL (default: 1 hour) to prevent deadlocks from failed materializations.
2878
+ *
2879
+ * **Performance:**
2880
+ * Full graph materialization can take minutes for large datasets. Consider running
2881
+ * during off-peak hours for production systems.
2882
+ *
2883
+ * **Credits:**
2884
+ * Materialization is included - no credit consumption
2845
2885
  */
2846
- const updateFileStatus = (options) => {
2847
- return (options.client ?? client_gen_1.client).patch({
2886
+ const materializeGraph = (options) => {
2887
+ return (options.client ?? client_gen_1.client).post({
2848
2888
  security: [
2849
2889
  {
2850
2890
  name: 'X-API-Key',
@@ -2855,7 +2895,7 @@ const updateFileStatus = (options) => {
2855
2895
  type: 'http'
2856
2896
  }
2857
2897
  ],
2858
- url: '/v1/graphs/{graph_id}/tables/files/{file_id}',
2898
+ url: '/v1/graphs/{graph_id}/materialize',
2859
2899
  ...options,
2860
2900
  headers: {
2861
2901
  'Content-Type': 'application/json',
@@ -2863,73 +2903,89 @@ const updateFileStatus = (options) => {
2863
2903
  }
2864
2904
  });
2865
2905
  };
2866
- exports.updateFileStatus = updateFileStatus;
2906
+ exports.materializeGraph = materializeGraph;
2867
2907
  /**
2868
- * Ingest Tables to Graph
2869
- * Load all files from S3 into DuckDB staging tables and ingest into Kuzu graph database.
2908
+ * List Files in Graph
2909
+ * List all files in the graph with optional filtering.
2870
2910
  *
2871
- * Orchestrates the complete data pipeline from S3 staging files into the Kuzu graph database.
2872
- * Processes all tables in a single bulk operation with comprehensive error handling and metrics.
2911
+ * Get a complete inventory of files across all tables or filtered by table name,
2912
+ * status, or other criteria. Files are first-class resources with independent lifecycle.
2913
+ *
2914
+ * **Query Parameters:**
2915
+ * - `table_name` (optional): Filter by table name
2916
+ * - `status` (optional): Filter by upload status (uploaded, pending, failed, etc.)
2873
2917
  *
2874
2918
  * **Use Cases:**
2875
- * - Initial graph population from uploaded data
2876
- * - Incremental data updates with new files
2877
- * - Complete database rebuild from source files
2878
- * - Recovery from failed ingestion attempts
2919
+ * - Monitor file upload progress across all tables
2920
+ * - Verify files are ready for ingestion
2921
+ * - Check file metadata and sizes
2922
+ * - Track storage usage per graph
2923
+ * - Identify failed or incomplete uploads
2924
+ * - Audit file provenance
2879
2925
  *
2880
- * **Workflow:**
2881
- * 1. Upload data files via `POST /tables/{table_name}/files`
2882
- * 2. Files are validated and marked as 'uploaded'
2883
- * 3. Trigger ingestion: `POST /tables/ingest`
2884
- * 4. DuckDB staging tables created from S3 patterns
2885
- * 5. Data copied from DuckDB to Kuzu
2886
- * 6. Per-table results and metrics returned
2926
+ * **Returned Metadata:**
2927
+ * - File ID, name, and format (parquet, csv, json)
2928
+ * - Size in bytes and row count (if available)
2929
+ * - Upload status and timestamps
2930
+ * - DuckDB and graph ingestion status
2931
+ * - Table association
2887
2932
  *
2888
- * **Rebuild Feature:**
2889
- * Setting `rebuild=true` regenerates the entire graph database from scratch:
2890
- * - Deletes existing Kuzu database
2891
- * - Recreates with fresh schema from active GraphSchema
2892
- * - Ingests all data files
2893
- * - Safe operation - S3 is source of truth
2894
- * - Useful for schema changes or data corrections
2895
- * - Graph marked as 'rebuilding' during process
2933
+ * **File Lifecycle Tracking:**
2934
+ * Multi-layer status across S3 DuckDB Graph pipeline
2896
2935
  *
2897
- * **Error Handling:**
2898
- * - Per-table error isolation with `ignore_errors` flag
2899
- * - Partial success support (some tables succeed, some fail)
2900
- * - Detailed error reporting per table
2901
- * - Graph status tracking throughout process
2902
- * - Automatic failure recovery and cleanup
2936
+ * **Important Notes:**
2937
+ * - Files are graph-scoped, not table-scoped
2938
+ * - Use table_name parameter to filter by table
2939
+ * - File listing is included - no credit consumption
2940
+ */
2941
+ const listFiles = (options) => {
2942
+ return (options.client ?? client_gen_1.client).get({
2943
+ security: [
2944
+ {
2945
+ name: 'X-API-Key',
2946
+ type: 'apiKey'
2947
+ },
2948
+ {
2949
+ scheme: 'bearer',
2950
+ type: 'http'
2951
+ }
2952
+ ],
2953
+ url: '/v1/graphs/{graph_id}/files',
2954
+ ...options
2955
+ });
2956
+ };
2957
+ exports.listFiles = listFiles;
2958
+ /**
2959
+ * Create File Upload
2960
+ * Generate presigned S3 URL for file upload.
2903
2961
  *
2904
- * **Performance:**
2905
- * - Processes all tables in sequence
2906
- * - Each table timed independently
2907
- * - Total execution metrics provided
2908
- * - Scales to thousands of files
2909
- * - Optimized for large datasets
2962
+ * Initiate file upload by generating a secure, time-limited presigned S3 URL.
2963
+ * Files are first-class resources uploaded directly to S3.
2910
2964
  *
2911
- * **Concurrency Control:**
2912
- * Only one ingestion can run per graph at a time. If another ingestion is in progress,
2913
- * you'll receive a 409 Conflict error. The distributed lock automatically expires after
2914
- * the configured TTL (default: 1 hour) to prevent deadlocks from failed ingestions.
2965
+ * **Request Body:**
2966
+ * - `file_name`: Name of the file (1-255 characters)
2967
+ * - `file_format`: Format (parquet, csv, json)
2968
+ * - `table_name`: Table to associate file with
2915
2969
  *
2916
- * **Subgraph Support:**
2917
- * This endpoint accepts both parent graph IDs and subgraph IDs.
2918
- * - Parent graph: Use `graph_id` like `kg0123456789abcdef`
2919
- * - Subgraph: Use full subgraph ID like `kg0123456789abcdef_dev`
2920
- * Each subgraph has independent staging tables and graph data. Ingestion operates
2921
- * on the specified graph/subgraph only and does not affect other subgraphs.
2970
+ * **Upload Workflow:**
2971
+ * 1. Call this endpoint to get presigned URL
2972
+ * 2. PUT file directly to S3 URL
2973
+ * 3. Call PATCH /files/{file_id} with status='uploaded'
2974
+ * 4. Backend validates and stages in DuckDB immediately
2975
+ * 5. Background task ingests to graph
2976
+ *
2977
+ * **Supported Formats:**
2978
+ * - Parquet, CSV, JSON
2979
+ *
2980
+ * **Auto-Table Creation:**
2981
+ * Tables are automatically created if they don't exist.
2922
2982
  *
2923
2983
  * **Important Notes:**
2924
- * - Only files with 'uploaded' status are processed
2925
- * - Tables with no uploaded files are skipped
2926
- * - Use `ignore_errors=false` for strict validation
2927
- * - Monitor progress via per-table results
2928
- * - Check graph metadata for rebuild status
2929
- * - Wait for current ingestion to complete before starting another
2930
- * - Table ingestion is included - no credit consumption
2931
- */
2932
- const ingestTables = (options) => {
2984
+ * - Presigned URLs expire (default: 1 hour)
2985
+ * - Files are graph-scoped, independent resources
2986
+ * - Upload URL generation is included - no credit consumption
2987
+ */
2988
+ const createFileUpload = (options) => {
2933
2989
  return (options.client ?? client_gen_1.client).post({
2934
2990
  security: [
2935
2991
  {
@@ -2941,7 +2997,7 @@ const ingestTables = (options) => {
2941
2997
  type: 'http'
2942
2998
  }
2943
2999
  ],
2944
- url: '/v1/graphs/{graph_id}/tables/ingest',
3000
+ url: '/v1/graphs/{graph_id}/files',
2945
3001
  ...options,
2946
3002
  headers: {
2947
3003
  'Content-Type': 'application/json',
@@ -2949,68 +3005,102 @@ const ingestTables = (options) => {
2949
3005
  }
2950
3006
  });
2951
3007
  };
2952
- exports.ingestTables = ingestTables;
3008
+ exports.createFileUpload = createFileUpload;
2953
3009
  /**
2954
- * Query Staging Tables with SQL
2955
- * Execute SQL queries on DuckDB staging tables for data inspection and validation.
3010
+ * Delete File
3011
+ * Delete file from all layers.
2956
3012
  *
2957
- * Query raw staging data directly with SQL before ingestion into the graph database.
2958
- * Useful for data quality checks, validation, and exploratory analysis.
3013
+ * Remove file from S3, database tracking, and optionally from DuckDB and graph.
3014
+ * Files are deleted by file_id, independent of table context.
2959
3015
  *
2960
- * **Security Best Practice - Use Parameterized Queries:**
2961
- * ALWAYS use query parameters instead of string concatenation to prevent SQL injection:
2962
- * - ✅ SAFE: `SELECT * FROM Entity WHERE type = ? LIMIT ?` with `parameters: ["Company", 100]`
2963
- * - ❌ UNSAFE: `SELECT * FROM Entity WHERE type = 'Company' LIMIT 100` with user input concatenated into SQL string
3016
+ * **Query Parameters:**
3017
+ * - `cascade` (optional, default=false): Delete from all layers including DuckDB
2964
3018
  *
2965
- * Query parameters provide automatic escaping and type safety. Use `?` placeholders with parameters array.
3019
+ * **What Happens (cascade=false):**
3020
+ * 1. File deleted from S3
3021
+ * 2. Database record removed
3022
+ * 3. Table statistics updated
2966
3023
  *
2967
- * **Use Cases:**
2968
- * - Validate data quality before graph ingestion
2969
- * - Inspect row-level data for debugging
2970
- * - Run analytics on staging tables
2971
- * - Check for duplicates, nulls, or data issues
2972
- * - Preview data transformations
3024
+ * **What Happens (cascade=true):**
3025
+ * 1. File data deleted from all DuckDB tables (by file_id)
3026
+ * 2. Graph marked as stale
3027
+ * 3. File deleted from S3
3028
+ * 4. Database record removed
3029
+ * 5. Table statistics updated
2973
3030
  *
2974
- * **Workflow:**
2975
- * 1. Upload data files via `POST /tables/{table_name}/files`
2976
- * 2. Query staging tables to validate: `POST /tables/query`
2977
- * 3. Fix any data issues by re-uploading
2978
- * 4. Ingest validated data: `POST /tables/ingest`
3031
+ * **Use Cases:**
3032
+ * - Remove incorrect or duplicate files
3033
+ * - Clean up failed uploads
3034
+ * - Delete files before graph ingestion
3035
+ * - Surgical data removal with cascade
2979
3036
  *
2980
- * **Supported SQL:**
2981
- * - Full DuckDB SQL syntax
2982
- * - SELECT, JOIN, WHERE, GROUP BY, ORDER BY
2983
- * - Aggregations, window functions, CTEs
2984
- * - Multiple table joins across staging area
3037
+ * **Security:**
3038
+ * - Write access required
3039
+ * - Shared repositories block deletions
3040
+ * - Full audit trail
2985
3041
  *
2986
- * **Common Operations:**
2987
- * - Count rows: `SELECT COUNT(*) FROM Entity`
2988
- * - Filter by type: `SELECT * FROM Entity WHERE entity_type = ? LIMIT ?` with `parameters: ["Company", 100]`
2989
- * - Check for nulls: `SELECT * FROM Entity WHERE name IS NULL LIMIT 10`
2990
- * - Find duplicates: `SELECT identifier, COUNT(*) as cnt FROM Entity GROUP BY identifier HAVING COUNT(*) > 1`
2991
- * - Filter amounts: `SELECT * FROM Transaction WHERE amount > ? AND date >= ?` with `parameters: [1000, "2024-01-01"]`
3042
+ * **Important:**
3043
+ * - Use cascade=true for immediate DuckDB cleanup
3044
+ * - Graph rebuild recommended after cascade deletion
3045
+ * - File deletion is included - no credit consumption
3046
+ */
3047
+ const deleteFile = (options) => {
3048
+ return (options.client ?? client_gen_1.client).delete({
3049
+ security: [
3050
+ {
3051
+ name: 'X-API-Key',
3052
+ type: 'apiKey'
3053
+ },
3054
+ {
3055
+ scheme: 'bearer',
3056
+ type: 'http'
3057
+ }
3058
+ ],
3059
+ url: '/v1/graphs/{graph_id}/files/{file_id}',
3060
+ ...options
3061
+ });
3062
+ };
3063
+ exports.deleteFile = deleteFile;
3064
+ /**
3065
+ * Get File Information
3066
+ * Get detailed information about a specific file.
2992
3067
  *
2993
- * **Limits:**
2994
- * - Query timeout: 30 seconds
2995
- * - Result limit: 10,000 rows (use LIMIT clause)
2996
- * - Read-only: No INSERT, UPDATE, DELETE
2997
- * - User's tables only: Cannot query other users' data
3068
+ * Retrieve comprehensive metadata for a single file by file_id, independent of
3069
+ * table context. Files are first-class resources with complete lifecycle tracking.
2998
3070
  *
2999
- * **Subgraph Support:**
3000
- * This endpoint accepts both parent graph IDs and subgraph IDs.
3001
- * - Parent graph: Use `graph_id` like `kg0123456789abcdef`
3002
- * - Subgraph: Use full subgraph ID like `kg0123456789abcdef_dev`
3003
- * Each subgraph has its own independent staging tables.
3071
+ * **Returned Information:**
3072
+ * - File ID, name, format, size
3073
+ * - Upload status and timestamps
3074
+ * - **Enhanced Multi-Layer Status** (new in this version):
3075
+ * - S3 layer: upload_status, uploaded_at, size_bytes, row_count
3076
+ * - DuckDB layer: duckdb_status, duckdb_staged_at, duckdb_row_count
3077
+ * - Graph layer: graph_status, graph_ingested_at
3078
+ * - Table association
3079
+ * - S3 location
3080
+ *
3081
+ * **Multi-Layer Pipeline Visibility:**
3082
+ * The `layers` object provides independent status tracking across the three-tier
3083
+ * data pipeline:
3084
+ * - **S3 (Immutable Source)**: File upload and validation
3085
+ * - **DuckDB (Mutable Staging)**: Immediate queryability with file provenance
3086
+ * - **Graph (Immutable View)**: Optional graph database materialization
3087
+ *
3088
+ * Each layer shows its own status, timestamp, and row count (where applicable),
3089
+ * enabling precise debugging and monitoring of the data ingestion flow.
3004
3090
  *
3005
- * **Shared Repositories:**
3006
- * Shared repositories (SEC, etc.) do not allow direct SQL queries.
3007
- * Use the graph query endpoint instead: `POST /v1/graphs/{graph_id}/query`
3091
+ * **Use Cases:**
3092
+ * - Validate file upload completion
3093
+ * - Monitor multi-layer ingestion progress in real-time
3094
+ * - Debug upload or staging issues at specific layers
3095
+ * - Verify file metadata and row counts
3096
+ * - Track file provenance through the pipeline
3097
+ * - Identify bottlenecks in the ingestion process
3008
3098
  *
3009
3099
  * **Note:**
3010
- * Staging table queries are included - no credit consumption
3100
+ * File info retrieval is included - no credit consumption
3011
3101
  */
3012
- const queryTables = (options) => {
3013
- return (options.client ?? client_gen_1.client).post({
3102
+ const getFile = (options) => {
3103
+ return (options.client ?? client_gen_1.client).get({
3014
3104
  security: [
3015
3105
  {
3016
3106
  name: 'X-API-Key',
@@ -3021,7 +3111,53 @@ const queryTables = (options) => {
3021
3111
  type: 'http'
3022
3112
  }
3023
3113
  ],
3024
- url: '/v1/graphs/{graph_id}/tables/query',
3114
+ url: '/v1/graphs/{graph_id}/files/{file_id}',
3115
+ ...options
3116
+ });
3117
+ };
3118
+ exports.getFile = getFile;
3119
+ /**
3120
+ * Update File Status
3121
+ * Update file status and trigger processing.
3122
+ *
3123
+ * Update file status after upload completion. Setting status='uploaded' triggers
3124
+ * immediate DuckDB staging and optional graph ingestion.
3125
+ *
3126
+ * **Request Body:**
3127
+ * - `status`: New status (uploaded, disabled, failed)
3128
+ * - `ingest_to_graph` (optional): If true, auto-ingest to graph after DuckDB staging
3129
+ *
3130
+ * **What Happens (status='uploaded'):**
3131
+ * 1. File validated in S3
3132
+ * 2. Row count calculated
3133
+ * 3. DuckDB staging triggered immediately (Celery task)
3134
+ * 4. If ingest_to_graph=true, graph ingestion queued
3135
+ * 5. File queryable in DuckDB within seconds
3136
+ *
3137
+ * **Use Cases:**
3138
+ * - Signal upload completion
3139
+ * - Trigger immediate DuckDB staging
3140
+ * - Enable/disable files
3141
+ * - Mark failed uploads
3142
+ *
3143
+ * **Important:**
3144
+ * - Files must exist in S3 before marking uploaded
3145
+ * - DuckDB staging happens asynchronously
3146
+ * - Graph ingestion is optional (ingest_to_graph flag)
3147
+ */
3148
+ const updateFile = (options) => {
3149
+ return (options.client ?? client_gen_1.client).patch({
3150
+ security: [
3151
+ {
3152
+ name: 'X-API-Key',
3153
+ type: 'apiKey'
3154
+ },
3155
+ {
3156
+ scheme: 'bearer',
3157
+ type: 'http'
3158
+ }
3159
+ ],
3160
+ url: '/v1/graphs/{graph_id}/files/{file_id}',
3025
3161
  ...options,
3026
3162
  headers: {
3027
3163
  'Content-Type': 'application/json',
@@ -3029,7 +3165,7 @@ const queryTables = (options) => {
3029
3165
  }
3030
3166
  });
3031
3167
  };
3032
- exports.queryTables = queryTables;
3168
+ exports.updateFile = updateFile;
3033
3169
  /**
3034
3170
  * Get User Graphs and Repositories
3035
3171
  * List all graph databases and shared repositories accessible to the current user.