@pgflow/core 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/{CHANGELOG.md → dist/CHANGELOG.md} +6 -0
- package/package.json +8 -5
- package/__tests__/mocks/index.ts +0 -1
- package/__tests__/mocks/postgres.ts +0 -37
- package/__tests__/types/PgflowSqlClient.test-d.ts +0 -59
- package/docs/options_for_flow_and_steps.md +0 -75
- package/docs/pgflow-blob-reference-system.md +0 -179
- package/eslint.config.cjs +0 -22
- package/example-flow.mermaid +0 -5
- package/example-flow.svg +0 -1
- package/flow-lifecycle.mermaid +0 -83
- package/flow-lifecycle.svg +0 -1
- package/out-tsc/vitest/__tests__/mocks/index.d.ts +0 -2
- package/out-tsc/vitest/__tests__/mocks/index.d.ts.map +0 -1
- package/out-tsc/vitest/__tests__/mocks/postgres.d.ts +0 -15
- package/out-tsc/vitest/__tests__/mocks/postgres.d.ts.map +0 -1
- package/out-tsc/vitest/__tests__/types/PgflowSqlClient.test-d.d.ts +0 -2
- package/out-tsc/vitest/__tests__/types/PgflowSqlClient.test-d.d.ts.map +0 -1
- package/out-tsc/vitest/tsconfig.spec.tsbuildinfo +0 -1
- package/out-tsc/vitest/vite.config.d.ts +0 -3
- package/out-tsc/vitest/vite.config.d.ts.map +0 -1
- package/pkgs/core/dist/index.js +0 -54
- package/pkgs/core/dist/pkgs/core/LICENSE.md +0 -660
- package/pkgs/core/dist/pkgs/core/README.md +0 -373
- package/pkgs/dsl/dist/index.js +0 -123
- package/pkgs/dsl/dist/pkgs/dsl/README.md +0 -11
- package/pkgs/edge-worker/dist/index.js +0 -953
- package/pkgs/edge-worker/dist/index.js.map +0 -7
- package/pkgs/edge-worker/dist/pkgs/edge-worker/LICENSE.md +0 -660
- package/pkgs/edge-worker/dist/pkgs/edge-worker/README.md +0 -46
- package/pkgs/example-flows/dist/index.js +0 -152
- package/pkgs/example-flows/dist/pkgs/example-flows/README.md +0 -11
- package/project.json +0 -125
- package/prompts/architect.md +0 -87
- package/prompts/condition.md +0 -33
- package/prompts/declarative_sql.md +0 -15
- package/prompts/deps_in_payloads.md +0 -20
- package/prompts/dsl-multi-arg.ts +0 -48
- package/prompts/dsl-options.md +0 -39
- package/prompts/dsl-single-arg.ts +0 -51
- package/prompts/dsl-two-arg.ts +0 -61
- package/prompts/dsl.md +0 -119
- package/prompts/fanout_steps.md +0 -1
- package/prompts/json_schemas.md +0 -36
- package/prompts/one_shot.md +0 -286
- package/prompts/pgtap.md +0 -229
- package/prompts/sdk.md +0 -59
- package/prompts/step_types.md +0 -62
- package/prompts/versioning.md +0 -16
- package/queries/fail_permanently.sql +0 -17
- package/queries/fail_task.sql +0 -21
- package/queries/sequential.sql +0 -47
- package/queries/two_roots_left_right.sql +0 -59
- package/schema.svg +0 -1
- package/scripts/colorize-pgtap-output.awk +0 -72
- package/scripts/run-test-with-colors +0 -5
- package/scripts/watch-test +0 -7
- package/src/PgflowSqlClient.ts +0 -85
- package/src/database-types.ts +0 -759
- package/src/index.ts +0 -3
- package/src/types.ts +0 -103
- package/supabase/config.toml +0 -32
- package/supabase/seed.sql +0 -202
- package/supabase/tests/add_step/basic_step_addition.test.sql +0 -29
- package/supabase/tests/add_step/circular_dependency.test.sql +0 -21
- package/supabase/tests/add_step/flow_isolation.test.sql +0 -26
- package/supabase/tests/add_step/idempotent_step_addition.test.sql +0 -20
- package/supabase/tests/add_step/invalid_step_slug.test.sql +0 -16
- package/supabase/tests/add_step/nonexistent_dependency.test.sql +0 -16
- package/supabase/tests/add_step/nonexistent_flow.test.sql +0 -13
- package/supabase/tests/add_step/options.test.sql +0 -66
- package/supabase/tests/add_step/step_with_dependency.test.sql +0 -36
- package/supabase/tests/add_step/step_with_multiple_dependencies.test.sql +0 -46
- package/supabase/tests/complete_task/archives_message.test.sql +0 -67
- package/supabase/tests/complete_task/completes_run_if_no_more_remaining_steps.test.sql +0 -62
- package/supabase/tests/complete_task/completes_task_and_updates_dependents.test.sql +0 -64
- package/supabase/tests/complete_task/decrements_remaining_steps_if_completing_step.test.sql +0 -62
- package/supabase/tests/complete_task/saves_output_when_completing_run.test.sql +0 -57
- package/supabase/tests/create_flow/flow_creation.test.sql +0 -27
- package/supabase/tests/create_flow/idempotency_and_duplicates.test.sql +0 -26
- package/supabase/tests/create_flow/invalid_slug.test.sql +0 -13
- package/supabase/tests/create_flow/options.test.sql +0 -57
- package/supabase/tests/fail_task/exponential_backoff.test.sql +0 -70
- package/supabase/tests/fail_task/mark_as_failed_if_no_retries_available.test.sql +0 -49
- package/supabase/tests/fail_task/respects_flow_retry_settings.test.sql +0 -48
- package/supabase/tests/fail_task/respects_step_retry_settings.test.sql +0 -48
- package/supabase/tests/fail_task/retry_task_if_retries_available.test.sql +0 -39
- package/supabase/tests/is_valid_slug.test.sql +0 -72
- package/supabase/tests/poll_for_tasks/builds_proper_input_from_deps_outputs.test.sql +0 -35
- package/supabase/tests/poll_for_tasks/hides_messages.test.sql +0 -35
- package/supabase/tests/poll_for_tasks/increments_attempts_count.test.sql +0 -35
- package/supabase/tests/poll_for_tasks/multiple_task_processing.test.sql +0 -24
- package/supabase/tests/poll_for_tasks/polls_only_queued_tasks.test.sql +0 -35
- package/supabase/tests/poll_for_tasks/reads_messages.test.sql +0 -38
- package/supabase/tests/poll_for_tasks/returns_no_tasks_if_no_step_task_for_message.test.sql +0 -34
- package/supabase/tests/poll_for_tasks/returns_no_tasks_if_queue_is_empty.test.sql +0 -19
- package/supabase/tests/poll_for_tasks/returns_no_tasks_when_qty_set_to_0.test.sql +0 -22
- package/supabase/tests/poll_for_tasks/sets_vt_delay_based_on_opt_timeout.test.sql +0 -41
- package/supabase/tests/poll_for_tasks/tasks_reapppear_if_not_processed_in_time.test.sql +0 -59
- package/supabase/tests/start_flow/creates_run.test.sql +0 -24
- package/supabase/tests/start_flow/creates_step_states_for_all_steps.test.sql +0 -25
- package/supabase/tests/start_flow/creates_step_tasks_only_for_root_steps.test.sql +0 -54
- package/supabase/tests/start_flow/returns_run.test.sql +0 -24
- package/supabase/tests/start_flow/sends_messages_on_the_queue.test.sql +0 -50
- package/supabase/tests/start_flow/starts_only_root_steps.test.sql +0 -21
- package/supabase/tests/step_dsl_is_idempotent.test.sql +0 -34
- package/tsconfig.json +0 -16
- package/tsconfig.lib.json +0 -26
- package/tsconfig.spec.json +0 -35
- package/vite.config.ts +0 -57
package/package.json
CHANGED
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pgflow/core",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.7",
|
|
4
4
|
"license": "AGPL-3.0",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
7
|
-
"
|
|
7
|
+
"module": "./dist/index.js",
|
|
8
|
+
"types": "./dist/index.d.ts",
|
|
9
|
+
"files": [
|
|
10
|
+
"dist",
|
|
11
|
+
"supabase/migrations/*.sql"
|
|
12
|
+
],
|
|
8
13
|
"private": false,
|
|
9
14
|
"exports": {
|
|
10
15
|
"./package.json": "./package.json",
|
|
@@ -13,15 +18,13 @@
|
|
|
13
18
|
"import": "./dist/index.js"
|
|
14
19
|
}
|
|
15
20
|
},
|
|
16
|
-
"types": "./dist/index.d.ts",
|
|
17
|
-
"module": "./dist/index.js",
|
|
18
21
|
"devDependencies": {
|
|
19
22
|
"supabase": "^2.6.8",
|
|
20
23
|
"vitest": "*"
|
|
21
24
|
},
|
|
22
25
|
"dependencies": {
|
|
23
26
|
"postgres": "^3.4.5",
|
|
24
|
-
"@pgflow/dsl": "^0.0.
|
|
27
|
+
"@pgflow/dsl": "^0.0.7"
|
|
25
28
|
},
|
|
26
29
|
"publishConfig": {
|
|
27
30
|
"access": "public"
|
package/__tests__/mocks/index.ts
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export * from './postgres.ts';
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
import { vi } from 'vitest';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Mock implementation for the postgres module
|
|
5
|
-
* This prevents real database connections during tests
|
|
6
|
-
*/
|
|
7
|
-
export function setupPostgresMock() {
|
|
8
|
-
// Create a properly typed SQL client with methods
|
|
9
|
-
type SqlClient = {
|
|
10
|
-
(strings: TemplateStringsArray, ...values: unknown[]): Promise<unknown[]>;
|
|
11
|
-
json: (data: unknown) => string;
|
|
12
|
-
begin: () => Promise<void>;
|
|
13
|
-
commit: () => Promise<void>;
|
|
14
|
-
rollback: () => Promise<void>;
|
|
15
|
-
end: () => Promise<void>;
|
|
16
|
-
};
|
|
17
|
-
|
|
18
|
-
// Create the main postgres function that returns the SQL client
|
|
19
|
-
const sql = vi.fn(() => {
|
|
20
|
-
// Create a complete mock SQL client with all required methods
|
|
21
|
-
const sqlClient = Object.assign(
|
|
22
|
-
vi.fn(() => Promise.resolve(['empty response'])),
|
|
23
|
-
{
|
|
24
|
-
json: vi.fn((data: unknown) => JSON.stringify(data)),
|
|
25
|
-
begin: vi.fn(() => Promise.resolve()),
|
|
26
|
-
commit: vi.fn(() => Promise.resolve()),
|
|
27
|
-
rollback: vi.fn(() => Promise.resolve()),
|
|
28
|
-
end: vi.fn(() => Promise.resolve()),
|
|
29
|
-
}
|
|
30
|
-
) as SqlClient;
|
|
31
|
-
|
|
32
|
-
return sqlClient;
|
|
33
|
-
});
|
|
34
|
-
|
|
35
|
-
// Return an object with a default key since postgres is imported as a default export
|
|
36
|
-
return { default: sql };
|
|
37
|
-
}
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
import { describe, it, expectTypeOf, vi, beforeEach } from 'vitest';
|
|
2
|
-
import { setupPostgresMock } from '../mocks/postgres.ts';
|
|
3
|
-
|
|
4
|
-
// Mock the postgres module so that it never makes a real connection.
|
|
5
|
-
// This must come before the postgres import
|
|
6
|
-
vi.mock('postgres', () => {
|
|
7
|
-
return setupPostgresMock();
|
|
8
|
-
});
|
|
9
|
-
|
|
10
|
-
import { PgflowSqlClient } from '../../src/PgflowSqlClient.ts';
|
|
11
|
-
import type { Json, StepTaskKey } from '../../src/types.ts';
|
|
12
|
-
import postgres from 'postgres';
|
|
13
|
-
import { Flow } from '@pgflow/dsl';
|
|
14
|
-
|
|
15
|
-
describe('PgflowSqlClient Type Compatibility with Flow', () => {
|
|
16
|
-
beforeEach(() => {
|
|
17
|
-
vi.clearAllMocks();
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
it('should properly type IPgflowClient methods', () => {
|
|
21
|
-
// Arrange
|
|
22
|
-
const sql = postgres();
|
|
23
|
-
const flow = new Flow<{ url: string }>({ slug: 'test_flow' });
|
|
24
|
-
const client = new PgflowSqlClient<typeof flow>(sql);
|
|
25
|
-
|
|
26
|
-
// Check pollForTasks method types
|
|
27
|
-
expectTypeOf(client.pollForTasks).toBeFunction();
|
|
28
|
-
expectTypeOf(client.pollForTasks).parameters.toMatchTypeOf<
|
|
29
|
-
[string, number?, number?, number?, number?]
|
|
30
|
-
>();
|
|
31
|
-
|
|
32
|
-
// Check completeTask method types
|
|
33
|
-
expectTypeOf(client.completeTask).toBeFunction();
|
|
34
|
-
expectTypeOf(client.completeTask).parameters.toMatchTypeOf<
|
|
35
|
-
[StepTaskKey, Json?]
|
|
36
|
-
>();
|
|
37
|
-
|
|
38
|
-
// Check failTask method types
|
|
39
|
-
expectTypeOf(client.failTask).toBeFunction();
|
|
40
|
-
expectTypeOf(client.failTask).parameters.toMatchTypeOf<
|
|
41
|
-
[StepTaskKey, unknown]
|
|
42
|
-
>();
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
it('allows only valid Flow input', () => {
|
|
46
|
-
const sql = postgres();
|
|
47
|
-
const flow = new Flow<{ url: string }>({ slug: 'test_flow' });
|
|
48
|
-
const client = new PgflowSqlClient<typeof flow>(sql);
|
|
49
|
-
|
|
50
|
-
// @ts-expect-error - Flow expects { url: string } not a number
|
|
51
|
-
client.startFlow(flow, 23);
|
|
52
|
-
|
|
53
|
-
// @ts-expect-error - Flow expects { url: string }
|
|
54
|
-
client.startFlow(flow, { url: 23 });
|
|
55
|
-
|
|
56
|
-
// @ts-expect-error - Flow does not accept extraneous keys
|
|
57
|
-
client.startFlow(flow, { url: 'string', extraneousKey: 'value' });
|
|
58
|
-
});
|
|
59
|
-
});
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
:::
|
|
2
|
-
|
|
3
|
-
1. MVP will allow only 1:1 queue:flow mappings, because it simplifies a lot.
|
|
4
|
-
Queue is created when creating a flow, in pgflow.create_flow() function.
|
|
5
|
-
|
|
6
|
-
2. No, we will start with either static delay or simple exponential and
|
|
7
|
-
I do not plan to expand on it further.
|
|
8
|
-
|
|
9
|
-
The backoff will be calculated by the retry attempts, that can be deducted
|
|
10
|
-
from pgmq's "read_ct" counter.
|
|
11
|
-
|
|
12
|
-
No need for jitter for now.
|
|
13
|
-
|
|
14
|
-
3. **Execution Timeouts**
|
|
15
|
-
I'm not sure about flow timeouts, I think for MVP we should probably
|
|
16
|
-
skip them and I'm not even sure if I want to have them for the steps too.
|
|
17
|
-
|
|
18
|
-
I would be definitely adding them in future.
|
|
19
|
-
|
|
20
|
-
4. We want everything to by statically typed in TypeScript, because the main
|
|
21
|
-
way to define flows would be to use TS DSL.
|
|
22
|
-
|
|
23
|
-
So a condition must be a JSON-serializable object that will get saved
|
|
24
|
-
in a JSONB column.
|
|
25
|
-
|
|
26
|
-
My initial idea for conditions was to just provide a JSON object that we
|
|
27
|
-
will use to perform containment check on the step inputs using @> operator.
|
|
28
|
-
|
|
29
|
-
I am considering expanding it to more robust condition, but they would need
|
|
30
|
-
to be defined in JSON-serializable way.
|
|
31
|
-
|
|
32
|
-
I do not want to have any SQL snippets in conditions, because I cannot
|
|
33
|
-
statically type them and they can fail at runtime.
|
|
34
|
-
|
|
35
|
-
No, conditions should probably only be able to reference step inputs.
|
|
36
|
-
|
|
37
|
-
5. Yes it should be able to disable retries per step or override their params.
|
|
38
|
-
How those circuit breakers should work and what would be the benefit of them?
|
|
39
|
-
What is retry budget? Do we have a distributed system really? Everything
|
|
40
|
-
lives in a single postgres instance.
|
|
41
|
-
|
|
42
|
-
6. No we do not want to support dependency resolution at runtime,
|
|
43
|
-
but conditions could be used to implement something similar.
|
|
44
|
-
Steps should not be able to add new steps during runtime,
|
|
45
|
-
but I plan to have fanout steps that will spawn either multiple tasks
|
|
46
|
-
or multiple subflows, one per the input array item.
|
|
47
|
-
Those are meant to be aggregated back to the output array when completed.
|
|
48
|
-
|
|
49
|
-
It's not for MVP tho!!
|
|
50
|
-
|
|
51
|
-
### Cross-Cutting Concerns
|
|
52
|
-
7. We will advise users to not put anything sensitive into flow options,
|
|
53
|
-
by writing docs and also not having handlers any ability to access the
|
|
54
|
-
step options.
|
|
55
|
-
|
|
56
|
-
We will provide a Context object that users can define in the Flow DSL,
|
|
57
|
-
that will be passed to the step handlers at runtime and will encourage
|
|
58
|
-
users to use this Context object to store sensitive data.
|
|
59
|
-
|
|
60
|
-
8. What you mean by tracked metadata for options and options affecting metric?
|
|
61
|
-
|
|
62
|
-
9. Versioning is a big problem for my project - I have few ideas how to solve
|
|
63
|
-
it (basically topologically sort graph and hash it to create a version hash).
|
|
64
|
-
But for MVP we definitely don't need versioning - users must take care of
|
|
65
|
-
this on their own by just creating new flows if they change the shape of
|
|
66
|
-
the flow. There is no way and will be no way to UPDATE flows.
|
|
67
|
-
|
|
68
|
-
Graph shape should be immutable after creation, but i'm not sure about
|
|
69
|
-
the options - maybe it would be a good idea to allow updating retry configs,
|
|
70
|
-
because only running the flows in production can allow users to gather
|
|
71
|
-
enough data to make educated decisions about those params.
|
|
72
|
-
|
|
73
|
-
10. No modification for flows/steps at all, so easy.
|
|
74
|
-
|
|
75
|
-
:::
|
|
@@ -1,179 +0,0 @@
|
|
|
1
|
-
# PgFlow Blob Reference System
|
|
2
|
-
|
|
3
|
-
## Overview
|
|
4
|
-
|
|
5
|
-
PgFlow needs an efficient way to handle large data outputs from workflow steps. The Blob Reference System provides a solution by separating large data payloads from workflow control information while maintaining a seamless developer experience.
|
|
6
|
-
|
|
7
|
-
## How It Works
|
|
8
|
-
|
|
9
|
-
### Core Concept
|
|
10
|
-
|
|
11
|
-
When steps produce large outputs (e.g., HTML content from web scraping, binary data, large API responses), these outputs are stored separately in a dedicated blob storage table. The workflow state maintains references to these blobs rather than storing the actual large data.
|
|
12
|
-
|
|
13
|
-
### Database Structure
|
|
14
|
-
|
|
15
|
-
The system uses a dedicated table for blob storage:
|
|
16
|
-
|
|
17
|
-
```sql
|
|
18
|
-
CREATE TABLE pgflow.output_blobs (
|
|
19
|
-
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
20
|
-
content JSONB NOT NULL,
|
|
21
|
-
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
22
|
-
);
|
|
23
|
-
```
|
|
24
|
-
|
|
25
|
-
### Worker Task Structure
|
|
26
|
-
|
|
27
|
-
The `poll_for_tasks` function returns tasks with both regular inputs and blob references through a custom type:
|
|
28
|
-
|
|
29
|
-
```sql
|
|
30
|
-
CREATE TYPE pgflow.step_task_record AS (
|
|
31
|
-
flow_slug TEXT,
|
|
32
|
-
run_id UUID,
|
|
33
|
-
step_slug TEXT,
|
|
34
|
-
input JSONB,
|
|
35
|
-
blobs_refs JSONB
|
|
36
|
-
);
|
|
37
|
-
```
|
|
38
|
-
|
|
39
|
-
This design provides a clean separation between:
|
|
40
|
-
|
|
41
|
-
- `input`: Regular small data that can be directly included in the task
|
|
42
|
-
- `blobs_refs`: References to large data stored separately in the blob table
|
|
43
|
-
|
|
44
|
-
### Example Return Value
|
|
45
|
-
|
|
46
|
-
A task returned by `poll_for_tasks` might look like:
|
|
47
|
-
|
|
48
|
-
```json
|
|
49
|
-
{
|
|
50
|
-
"flow_slug": "my_flow",
|
|
51
|
-
"run_id": "1234-5678-90ab-cdef",
|
|
52
|
-
"step_slug": "my_step",
|
|
53
|
-
"input": {
|
|
54
|
-
"run": "run input",
|
|
55
|
-
"dependency_a": "dependency_a output"
|
|
56
|
-
},
|
|
57
|
-
"blobs_refs": {
|
|
58
|
-
"dependency_b": "<uuid to the blob saved for dependency_b which returned binary data>"
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
```
|
|
62
|
-
|
|
63
|
-
In this example:
|
|
64
|
-
|
|
65
|
-
- `dependency_a` had a small output that's included directly in the `input` object
|
|
66
|
-
- `dependency_b` had a large output (possibly binary data) that's stored as a blob, with only a reference included
|
|
67
|
-
|
|
68
|
-
### Queue Efficiency
|
|
69
|
-
|
|
70
|
-
A critical optimization in PgFlow is that the task queue only stores minimal task identification information:
|
|
71
|
-
|
|
72
|
-
- flow_slug
|
|
73
|
-
- run_id
|
|
74
|
-
- step_slug
|
|
75
|
-
- task_index
|
|
76
|
-
|
|
77
|
-
This lightweight approach keeps queue messages small and efficient. When a worker picks up a task, it uses these identifiers to:
|
|
78
|
-
|
|
79
|
-
1. Call `poll_for_tasks` to get the full task data
|
|
80
|
-
2. Receive both the regular `input` and `blobs_refs` in a single query result
|
|
81
|
-
3. Fetch the actual blob content for any referenced blobs
|
|
82
|
-
4. Combine all data to form the complete input for the task handler
|
|
83
|
-
|
|
84
|
-
## Implementation Flow
|
|
85
|
-
|
|
86
|
-
### Task Creation
|
|
87
|
-
|
|
88
|
-
1. When a step completes, its output is analyzed:
|
|
89
|
-
|
|
90
|
-
- Outputs below the size threshold remain in the regular output JSONB
|
|
91
|
-
- Large outputs are stored in the `pgflow.output_blobs` table with a unique ID
|
|
92
|
-
|
|
93
|
-
2. The `start_ready_steps` function:
|
|
94
|
-
- Creates task entries with references to any large blob data
|
|
95
|
-
- Enqueues only the task identifiers (not the actual data) in the task queue
|
|
96
|
-
|
|
97
|
-
### Task Execution
|
|
98
|
-
|
|
99
|
-
1. Worker picks up the task identifier from the queue
|
|
100
|
-
2. Worker calls `poll_for_tasks` to get the task details
|
|
101
|
-
3. `poll_for_tasks` returns:
|
|
102
|
-
- The `input` object with regular data
|
|
103
|
-
- The `blobs_refs` object with references to any large data outputs
|
|
104
|
-
4. Worker fetches blob content for any references in `blobs_refs`
|
|
105
|
-
5. Worker assembles the complete input (combining regular data and blob data) for the task handler
|
|
106
|
-
6. Task handler executes with the complete data, unaware of the blob reference system
|
|
107
|
-
|
|
108
|
-
### Example Processing Flow
|
|
109
|
-
|
|
110
|
-
For a web scraping workflow:
|
|
111
|
-
|
|
112
|
-
1. `fetch-html` step returns a large HTML string (3MB)
|
|
113
|
-
2. System detects the large output and:
|
|
114
|
-
- Stores HTML in `pgflow.output_blobs` with ID "abc-123"
|
|
115
|
-
- Records only the blob reference in the step's output
|
|
116
|
-
3. When `parse-html` step is ready to run:
|
|
117
|
-
- Queue contains only the task identifier
|
|
118
|
-
- `poll_for_tasks` returns the task with:
|
|
119
|
-
```json
|
|
120
|
-
{
|
|
121
|
-
"input": {
|
|
122
|
-
"run": { "url": "https://example.com" }
|
|
123
|
-
},
|
|
124
|
-
"blobs_refs": {
|
|
125
|
-
"fetch-html": "abc-123"
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
```
|
|
129
|
-
4. Worker:
|
|
130
|
-
- Detects the blob reference "abc-123" for "fetch-html"
|
|
131
|
-
- Fetches the actual HTML content from the blob table
|
|
132
|
-
- Provides the handler with complete input including the HTML content
|
|
133
|
-
|
|
134
|
-
## Developer Experience
|
|
135
|
-
|
|
136
|
-
From a workflow developer's perspective, the blob reference system is completely transparent:
|
|
137
|
-
|
|
138
|
-
```typescript
|
|
139
|
-
// Developer writes code as if all data is directly available
|
|
140
|
-
const parseHtmlHandler: StepHandler<ParseInput, ParseOutput> = async (
|
|
141
|
-
input
|
|
142
|
-
) => {
|
|
143
|
-
// input.dependencies["fetch-html"] contains the full HTML content
|
|
144
|
-
// (the blob reference was automatically resolved)
|
|
145
|
-
const html = input.dependencies['fetch-html'];
|
|
146
|
-
|
|
147
|
-
// Process the HTML...
|
|
148
|
-
const title = extractTitle(html);
|
|
149
|
-
const links = extractLinks(html);
|
|
150
|
-
|
|
151
|
-
return { title, links };
|
|
152
|
-
};
|
|
153
|
-
```
|
|
154
|
-
|
|
155
|
-
The developer never needs to:
|
|
156
|
-
|
|
157
|
-
- Manually resolve blob references
|
|
158
|
-
- Check if data is a reference or actual content
|
|
159
|
-
- Handle storage of large outputs differently
|
|
160
|
-
|
|
161
|
-
## Benefits and Considerations
|
|
162
|
-
|
|
163
|
-
### Benefits
|
|
164
|
-
|
|
165
|
-
1. **Database Efficiency**: Large data is stored separately from workflow metadata
|
|
166
|
-
2. **Queue Performance**: Queue messages remain small and consistent in size
|
|
167
|
-
3. **Separation of Concerns**: Control flow data is separate from large payloads
|
|
168
|
-
4. **Transparent to Developers**: No special code required to handle large data
|
|
169
|
-
5. **Scalability**: Can handle arbitrary data sizes without affecting workflow system performance
|
|
170
|
-
|
|
171
|
-
### Considerations
|
|
172
|
-
|
|
173
|
-
1. **Query Optimization**: Ensure `poll_for_tasks` efficiently retrieves both regular data and blob references
|
|
174
|
-
2. **Blob Lifecycle Management**: Implement cleanup for orphaned or expired blobs
|
|
175
|
-
3. **Size Threshold Tuning**: Configure appropriate thresholds for when data should use blob storage
|
|
176
|
-
|
|
177
|
-
## Conclusion
|
|
178
|
-
|
|
179
|
-
The Blob Reference System in PgFlow provides an elegant solution for handling large data in workflows. By splitting task data into regular inputs and blob references, the system maintains efficient database usage and queue performance while providing a seamless experience for workflow developers. The design ensures that large data is handled appropriately without requiring developers to write special code for blob resolution or storage.
|
package/eslint.config.cjs
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
const baseConfig = require('../../eslint.config.cjs');
|
|
2
|
-
|
|
3
|
-
module.exports = [
|
|
4
|
-
...baseConfig,
|
|
5
|
-
{
|
|
6
|
-
files: ['**/*.json'],
|
|
7
|
-
rules: {
|
|
8
|
-
'@nx/dependency-checks': [
|
|
9
|
-
'error',
|
|
10
|
-
{
|
|
11
|
-
ignoredFiles: [
|
|
12
|
-
'{projectRoot}/eslint.config.{js,cjs,mjs}',
|
|
13
|
-
'{projectRoot}/vite.config.{js,ts,mjs,mts}',
|
|
14
|
-
],
|
|
15
|
-
},
|
|
16
|
-
],
|
|
17
|
-
},
|
|
18
|
-
languageOptions: {
|
|
19
|
-
parser: require('jsonc-eslint-parser'),
|
|
20
|
-
},
|
|
21
|
-
},
|
|
22
|
-
];
|
package/example-flow.mermaid
DELETED
package/example-flow.svg
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
<svg aria-roledescription="flowchart-v2" role="graphics-document document" viewBox="0 0 497.90625 174" style="max-width: 497.906px; background-color: white;" class="flowchart" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" width="100%" id="my-svg"><style>#my-svg{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;fill:#333;}#my-svg .error-icon{fill:#552222;}#my-svg .error-text{fill:#552222;stroke:#552222;}#my-svg .edge-thickness-normal{stroke-width:1px;}#my-svg .edge-thickness-thick{stroke-width:3.5px;}#my-svg .edge-pattern-solid{stroke-dasharray:0;}#my-svg .edge-thickness-invisible{stroke-width:0;fill:none;}#my-svg .edge-pattern-dashed{stroke-dasharray:3;}#my-svg .edge-pattern-dotted{stroke-dasharray:2;}#my-svg .marker{fill:#333333;stroke:#333333;}#my-svg .marker.cross{stroke:#333333;}#my-svg svg{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;}#my-svg p{margin:0;}#my-svg .label{font-family:"trebuchet ms",verdana,arial,sans-serif;color:#333;}#my-svg .cluster-label text{fill:#333;}#my-svg .cluster-label span{color:#333;}#my-svg .cluster-label span p{background-color:transparent;}#my-svg .label text,#my-svg span{fill:#333;color:#333;}#my-svg .node rect,#my-svg .node circle,#my-svg .node ellipse,#my-svg .node polygon,#my-svg .node path{fill:#ECECFF;stroke:#9370DB;stroke-width:1px;}#my-svg .rough-node .label text,#my-svg .node .label text,#my-svg .image-shape .label,#my-svg .icon-shape .label{text-anchor:middle;}#my-svg .node .katex path{fill:#000;stroke:#000;stroke-width:1px;}#my-svg .rough-node .label,#my-svg .node .label,#my-svg .image-shape .label,#my-svg .icon-shape .label{text-align:center;}#my-svg .node.clickable{cursor:pointer;}#my-svg .root .anchor path{fill:#333333!important;stroke-width:0;stroke:#333333;}#my-svg .arrowheadPath{fill:#333333;}#my-svg .edgePath .path{stroke:#333333;stroke-width:2.0px;}#my-svg .flowchart-link{stroke:#333333;fill:none;}#my-svg .edgeLabel{background-color:rgba(232,232,232, 0.8);text-align:center;}#my-svg .edgeLabel p{background-color:rgba(232,232,232, 0.8);}#my-svg .edgeLabel rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#my-svg .labelBkg{background-color:rgba(232, 232, 232, 0.5);}#my-svg .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#my-svg .cluster text{fill:#333;}#my-svg .cluster span{color:#333;}#my-svg div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:12px;background:hsl(80, 100%, 96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#my-svg .flowchartTitleText{text-anchor:middle;font-size:18px;fill:#333;}#my-svg rect.text{fill:none;stroke-width:0;}#my-svg .icon-shape,#my-svg .image-shape{background-color:rgba(232,232,232, 0.8);text-align:center;}#my-svg .icon-shape p,#my-svg .image-shape p{background-color:rgba(232,232,232, 0.8);padding:2px;}#my-svg .icon-shape rect,#my-svg .image-shape rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#my-svg :root{--mermaid-font-family:"trebuchet ms",verdana,arial,sans-serif;}</style><g><marker orient="auto" markerHeight="8" markerWidth="8" markerUnits="userSpaceOnUse" refY="5" refX="5" viewBox="0 0 10 10" class="marker flowchart-v2" id="my-svg_flowchart-v2-pointEnd"><path style="stroke-width: 1; stroke-dasharray: 1, 0;" class="arrowMarkerPath" d="M 0 0 L 10 5 L 0 10 z"/></marker><marker orient="auto" markerHeight="8" markerWidth="8" markerUnits="userSpaceOnUse" refY="5" refX="4.5" viewBox="0 0 10 10" class="marker flowchart-v2" id="my-svg_flowchart-v2-pointStart"><path style="stroke-width: 1; stroke-dasharray: 1, 0;" class="arrowMarkerPath" d="M 0 5 L 10 10 L 10 0 z"/></marker><marker orient="auto" markerHeight="11" markerWidth="11" markerUnits="userSpaceOnUse" refY="5" refX="11" viewBox="0 0 10 10" class="marker flowchart-v2" id="my-svg_flowchart-v2-circleEnd"><circle style="stroke-width: 1; stroke-dasharray: 1, 0;" class="arrowMarkerPath" r="5" cy="5" cx="5"/></marker><marker orient="auto" markerHeight="11" markerWidth="11" markerUnits="userSpaceOnUse" refY="5" refX="-1" viewBox="0 0 10 10" class="marker flowchart-v2" id="my-svg_flowchart-v2-circleStart"><circle style="stroke-width: 1; stroke-dasharray: 1, 0;" class="arrowMarkerPath" r="5" cy="5" cx="5"/></marker><marker orient="auto" markerHeight="11" markerWidth="11" markerUnits="userSpaceOnUse" refY="5.2" refX="12" viewBox="0 0 11 11" class="marker cross flowchart-v2" id="my-svg_flowchart-v2-crossEnd"><path style="stroke-width: 2; stroke-dasharray: 1, 0;" class="arrowMarkerPath" d="M 1,1 l 9,9 M 10,1 l -9,9"/></marker><marker orient="auto" markerHeight="11" markerWidth="11" markerUnits="userSpaceOnUse" refY="5.2" refX="-1" viewBox="0 0 11 11" class="marker cross flowchart-v2" id="my-svg_flowchart-v2-crossStart"><path style="stroke-width: 2; stroke-dasharray: 1, 0;" class="arrowMarkerPath" d="M 1,1 l 9,9 M 10,1 l -9,9"/></marker><g class="root"><g class="clusters"/><g class="edgePaths"><path marker-end="url(#my-svg_flowchart-v2-pointEnd)" style="" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_A_B_0" d="M113.179,60L120.045,55.833C126.911,51.667,140.643,43.333,151.009,39.167C161.375,35,168.375,35,171.875,35L175.375,35"/><path marker-end="url(#my-svg_flowchart-v2-pointEnd)" style="" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_A_C_1" d="M113.179,114L120.045,118.167C126.911,122.333,140.643,130.667,151.308,134.833C161.974,139,169.573,139,173.372,139L177.172,139"/><path marker-end="url(#my-svg_flowchart-v2-pointEnd)" style="" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_B_D_2" d="M308.75,35L312.917,35C317.083,35,325.417,35,336.263,38.835C347.109,42.669,360.469,50.339,367.148,54.174L373.828,58.008"/><path marker-end="url(#my-svg_flowchart-v2-pointEnd)" style="" class="edge-thickness-normal edge-pattern-solid edge-thickness-normal edge-pattern-solid flowchart-link" id="L_C_D_3" d="M306.953,139L311.419,139C315.885,139,324.818,139,335.964,135.165C347.109,131.331,360.469,123.661,367.148,119.826L373.828,115.992"/></g><g class="edgeLabels"><g class="edgeLabel"><g transform="translate(0, 0)" class="label"><foreignObject height="0" width="0"><div style="display: table-cell; white-space: nowrap; line-height: 1.5; max-width: 200px; text-align: center;" class="labelBkg" xmlns="http://www.w3.org/1999/xhtml"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" class="label"><foreignObject height="0" width="0"><div style="display: table-cell; white-space: nowrap; line-height: 1.5; max-width: 200px; text-align: center;" class="labelBkg" xmlns="http://www.w3.org/1999/xhtml"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" class="label"><foreignObject height="0" width="0"><div style="display: table-cell; white-space: nowrap; line-height: 1.5; max-width: 200px; text-align: center;" class="labelBkg" xmlns="http://www.w3.org/1999/xhtml"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" class="label"><foreignObject height="0" width="0"><div style="display: table-cell; white-space: nowrap; line-height: 1.5; max-width: 200px; text-align: center;" class="labelBkg" xmlns="http://www.w3.org/1999/xhtml"><span class="edgeLabel"></span></div></foreignObject></g></g></g><g class="nodes"><g transform="translate(68.6875, 87)" id="flowchart-A-0" class="node default"><rect height="54" width="121.375" y="-27" x="-60.6875" style="" class="basic label-container"/><g transform="translate(-30.6875, -12)" style="" class="label"><rect/><foreignObject height="24" width="61.375"><div style="display: table-cell; white-space: nowrap; line-height: 1.5; max-width: 200px; text-align: center;" xmlns="http://www.w3.org/1999/xhtml"><span class="nodeLabel"><p>website</p></span></div></foreignObject></g></g><g transform="translate(244.0625, 35)" id="flowchart-B-1" class="node default"><rect height="54" width="129.375" y="-27" x="-64.6875" style="" class="basic label-container"/><g transform="translate(-34.6875, -12)" style="" class="label"><rect/><foreignObject height="24" width="69.375"><div style="display: table-cell; white-space: nowrap; line-height: 1.5; max-width: 200px; text-align: center;" xmlns="http://www.w3.org/1999/xhtml"><span class="nodeLabel"><p>sentiment</p></span></div></foreignObject></g></g><g transform="translate(244.0625, 139)" id="flowchart-C-3" class="node default"><rect height="54" width="125.78125" y="-27" x="-62.890625" style="" class="basic label-container"/><g transform="translate(-32.890625, -12)" style="" class="label"><rect/><foreignObject height="24" width="65.78125"><div style="display: table-cell; white-space: nowrap; line-height: 1.5; max-width: 200px; text-align: center;" xmlns="http://www.w3.org/1999/xhtml"><span class="nodeLabel"><p>summary</p></span></div></foreignObject></g></g><g transform="translate(424.328125, 87)" id="flowchart-D-5" class="node default"><rect height="54" width="131.15625" y="-27" x="-65.578125" style="" class="basic label-container"/><g transform="translate(-35.578125, -12)" style="" class="label"><rect/><foreignObject height="24" width="71.15625"><div style="display: table-cell; white-space: nowrap; line-height: 1.5; max-width: 200px; text-align: center;" xmlns="http://www.w3.org/1999/xhtml"><span class="nodeLabel"><p>saveToDb</p></span></div></foreignObject></g></g></g></g></g></svg>
|
package/flow-lifecycle.mermaid
DELETED
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
sequenceDiagram
|
|
2
|
-
participant Client
|
|
3
|
-
participant pgflow as pgflow SQL Core
|
|
4
|
-
participant PGMQ as PGMQ Queue
|
|
5
|
-
participant Worker as Edge Worker
|
|
6
|
-
participant Handler as Task Handler
|
|
7
|
-
|
|
8
|
-
Client->>pgflow: create_flow(...)
|
|
9
|
-
Client->>pgflow: add_step(...)
|
|
10
|
-
Client->>pgflow: start_flow(...)
|
|
11
|
-
|
|
12
|
-
activate pgflow
|
|
13
|
-
pgflow->>pgflow: Create run record
|
|
14
|
-
pgflow->>pgflow: Initialize step_states
|
|
15
|
-
pgflow->>pgflow: Create step_tasks for root steps
|
|
16
|
-
pgflow->>PGMQ: Enqueue message for root step task
|
|
17
|
-
pgflow-->>Client: Return run details
|
|
18
|
-
deactivate pgflow
|
|
19
|
-
|
|
20
|
-
Worker->>pgflow: poll_for_tasks(...)
|
|
21
|
-
|
|
22
|
-
activate pgflow
|
|
23
|
-
pgflow->>PGMQ: Get visible messages
|
|
24
|
-
PGMQ-->>pgflow: Return messages
|
|
25
|
-
pgflow->>pgflow: Increment attempts counter on task
|
|
26
|
-
pgflow->>pgflow: Build step input by combining run input & dependency outputs
|
|
27
|
-
pgflow-->>Worker: Return tasks with metadata and inputs
|
|
28
|
-
deactivate pgflow
|
|
29
|
-
|
|
30
|
-
Worker->>Worker: Find handler function for a task
|
|
31
|
-
|
|
32
|
-
note right of Worker: Task Execution
|
|
33
|
-
|
|
34
|
-
Worker->>Handler: Call handler function with task input
|
|
35
|
-
|
|
36
|
-
rect rgb(235, 255, 235)
|
|
37
|
-
note left of Handler: Task succeeds
|
|
38
|
-
activate Handler
|
|
39
|
-
Handler-->>Worker: Return result
|
|
40
|
-
deactivate Handler
|
|
41
|
-
|
|
42
|
-
Worker->>pgflow: complete_task(results)
|
|
43
|
-
|
|
44
|
-
activate pgflow
|
|
45
|
-
pgflow->>pgflow: Update task status to 'completed'
|
|
46
|
-
pgflow->>PGMQ: Archive message
|
|
47
|
-
pgflow->>pgflow: Update step_state to 'completed'
|
|
48
|
-
pgflow->>pgflow: Check & start dependent steps
|
|
49
|
-
pgflow->>PGMQ: Enqueue messages for ready dependent steps
|
|
50
|
-
pgflow->>pgflow: Decrement remaining_steps counter
|
|
51
|
-
pgflow->>pgflow: If all steps completed, mark run as 'completed'
|
|
52
|
-
pgflow-->>Worker: Confirmation
|
|
53
|
-
deactivate pgflow
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
rect rgb(250, 240, 240)
|
|
57
|
-
note left of Handler: Handler throws or exceeds timeout
|
|
58
|
-
|
|
59
|
-
activate Handler
|
|
60
|
-
Handler--xWorker: Throw exception or exceeds timeout
|
|
61
|
-
deactivate Handler
|
|
62
|
-
Worker->>pgflow: fail_task(error_message)
|
|
63
|
-
|
|
64
|
-
activate pgflow
|
|
65
|
-
pgflow->>pgflow: Check remaining retry attempts
|
|
66
|
-
|
|
67
|
-
rect rgb(240, 250, 240)
|
|
68
|
-
note right of pgflow: Retries remaining
|
|
69
|
-
pgflow->>PGMQ: Delay message visibility
|
|
70
|
-
PGMQ->>PGMQ: Message becomes visible
|
|
71
|
-
note right of Worker: Worker attempts execution again
|
|
72
|
-
Worker->>pgflow: poll_for_tasks(...)
|
|
73
|
-
end
|
|
74
|
-
rect rgb(255, 225, 225)
|
|
75
|
-
note right of pgflow: No retries remaining
|
|
76
|
-
pgflow->>pgflow: Mark task as 'failed'
|
|
77
|
-
pgflow->>pgflow: Mark step as 'failed'
|
|
78
|
-
pgflow->>pgflow: Mark run as 'failed'
|
|
79
|
-
pgflow->>PGMQ: Archive message
|
|
80
|
-
note right of pgflow: Permanent failure of a run
|
|
81
|
-
end
|
|
82
|
-
deactivate pgflow
|
|
83
|
-
end
|