moss-connector-supabase 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moss_connector_supabase-0.0.1/PKG-INFO +189 -0
- moss_connector_supabase-0.0.1/README.md +160 -0
- moss_connector_supabase-0.0.1/moss_connector_supabase.egg-info/PKG-INFO +189 -0
- moss_connector_supabase-0.0.1/moss_connector_supabase.egg-info/SOURCES.txt +12 -0
- moss_connector_supabase-0.0.1/moss_connector_supabase.egg-info/dependency_links.txt +1 -0
- moss_connector_supabase-0.0.1/moss_connector_supabase.egg-info/requires.txt +8 -0
- moss_connector_supabase-0.0.1/moss_connector_supabase.egg-info/top_level.txt +1 -0
- moss_connector_supabase-0.0.1/pyproject.toml +56 -0
- moss_connector_supabase-0.0.1/setup.cfg +4 -0
- moss_connector_supabase-0.0.1/src/__init__.py +4 -0
- moss_connector_supabase-0.0.1/src/connector.py +40 -0
- moss_connector_supabase-0.0.1/src/ingest.py +33 -0
- moss_connector_supabase-0.0.1/tests/test_integration_supabase_moss.py +118 -0
- moss_connector_supabase-0.0.1/tests/test_supabase.py +270 -0
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: moss-connector-supabase
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Supabase source connector for moss-connectors.
|
|
5
|
+
Author-email: "InferEdge Inc." <contact@moss.dev>
|
|
6
|
+
License: BSD-2-Clause
|
|
7
|
+
Project-URL: Homepage, https://github.com/usemoss/moss
|
|
8
|
+
Project-URL: Repository, https://github.com/usemoss/moss
|
|
9
|
+
Project-URL: Source, https://github.com/usemoss/moss/tree/main/packages/moss-data-connector/moss-connector-supabase
|
|
10
|
+
Keywords: moss,connectors,supabase,postgres,ingest,etl
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Database
|
|
20
|
+
Requires-Python: <3.15,>=3.10
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
Requires-Dist: moss>=1.1.1
|
|
23
|
+
Requires-Dist: supabase>=2.0
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
26
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
27
|
+
Requires-Dist: python-dotenv>=1.0.0; extra == "dev"
|
|
28
|
+
Requires-Dist: ruff>=0.5.0; extra == "dev"
|
|
29
|
+
|
|
30
|
+
# moss-connector-supabase
|
|
31
|
+
|
|
32
|
+
Supabase source connector for Moss. Uses [supabase-py](https://github.com/supabase/supabase-py) to read rows from a Supabase table over PostgREST.
|
|
33
|
+
|
|
34
|
+
## Install
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install moss-connector-supabase
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
This installs `supabase` automatically.
|
|
41
|
+
|
|
42
|
+
## Usage
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
import asyncio
|
|
46
|
+
from moss import DocumentInfo
|
|
47
|
+
from moss_connector_supabase import SupabaseConnector, ingest
|
|
48
|
+
|
|
49
|
+
async def main():
|
|
50
|
+
source = SupabaseConnector(
|
|
51
|
+
url="https://xxx.supabase.co",
|
|
52
|
+
key="your-anon-or-service-key",
|
|
53
|
+
table="articles",
|
|
54
|
+
mapper=lambda row: DocumentInfo(
|
|
55
|
+
id=str(row["id"]),
|
|
56
|
+
text=row["body"],
|
|
57
|
+
metadata={"title": row["title"]},
|
|
58
|
+
),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
result = await ingest(
|
|
62
|
+
source,
|
|
63
|
+
project_id="your_project_id",
|
|
64
|
+
project_key="your_project_key",
|
|
65
|
+
index_name="articles",
|
|
66
|
+
)
|
|
67
|
+
print(f"copied {result.doc_count} rows")
|
|
68
|
+
|
|
69
|
+
asyncio.run(main())
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Use `auto_id=True` when your mapper does not have a stable primary key and you want Moss to generate UUID document IDs.
|
|
73
|
+
|
|
74
|
+
## Data requirements
|
|
75
|
+
|
|
76
|
+
The connector doesn't impose a schema — it reads each row as a dict and passes it to your `mapper`. The constraints come from `DocumentInfo`, not the connector.
|
|
77
|
+
|
|
78
|
+
`DocumentInfo` fields:
|
|
79
|
+
|
|
80
|
+
| Field | Type | Required? | Typical Supabase column |
|
|
81
|
+
|---|---|---|---|
|
|
82
|
+
| `id` | `str` | yes | a primary key — `int`, `uuid`, slug, etc. |
|
|
83
|
+
| `text` | `str` | yes | the column you want to search (`body`, `description`, `content`...) |
|
|
84
|
+
| `metadata` | `Optional[Dict[str, str]]` | no | any other columns you want filterable / displayable |
|
|
85
|
+
| `embedding` | `Optional[Sequence[float]]` | no | only if you bring your own vectors with `model_id="custom"` |
|
|
86
|
+
|
|
87
|
+
So your table needs **at least one stringifiable column** to use as `id` and **at least one text column** to use as `text`. Everything else is optional. Examples:
|
|
88
|
+
|
|
89
|
+
**Minimal** — `id` + `body`:
|
|
90
|
+
|
|
91
|
+
```sql
|
|
92
|
+
CREATE TABLE notes (id int PRIMARY KEY, body text);
|
|
93
|
+
```
|
|
94
|
+
```python
|
|
95
|
+
mapper=lambda row: DocumentInfo(id=str(row["id"]), text=row["body"])
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
**Rich** — extra columns flow into metadata:
|
|
99
|
+
|
|
100
|
+
```sql
|
|
101
|
+
CREATE TABLE articles (
|
|
102
|
+
id uuid PRIMARY KEY,
|
|
103
|
+
title text,
|
|
104
|
+
body text,
|
|
105
|
+
author text,
|
|
106
|
+
published_at timestamptz,
|
|
107
|
+
view_count int,
|
|
108
|
+
tags text[]
|
|
109
|
+
);
|
|
110
|
+
```
|
|
111
|
+
```python
|
|
112
|
+
mapper=lambda row: DocumentInfo(
|
|
113
|
+
id=row["id"],
|
|
114
|
+
text=row["body"],
|
|
115
|
+
metadata={
|
|
116
|
+
"title": row["title"],
|
|
117
|
+
"author": row["author"],
|
|
118
|
+
"published_at": str(row["published_at"]), # timestamp -> str
|
|
119
|
+
"view_count": str(row["view_count"]), # int -> str
|
|
120
|
+
"tags": ",".join(row["tags"]), # array -> joined str
|
|
121
|
+
},
|
|
122
|
+
)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### One gotcha: metadata values must be strings
|
|
126
|
+
|
|
127
|
+
Postgres types like `int`, `bool`, `timestamp`, `numeric`, `array`, `jsonb` come back from Supabase as their native Python types (`int`, `bool`, `datetime`, `list`, `dict`). `DocumentInfo.metadata` requires `Dict[str, str]`, so non-string columns must be coerced in the mapper:
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
# WILL FAIL — non-string values
|
|
131
|
+
metadata={"price": row["price"], "in_stock": row["in_stock"]}
|
|
132
|
+
|
|
133
|
+
# CORRECT
|
|
134
|
+
metadata={"price": str(row["price"]), "in_stock": str(row["in_stock"])}
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
The same applies to `id` if your primary key is an `int` — wrap with `str(...)`.
|
|
138
|
+
|
|
139
|
+
### What you can't do (use a view instead)
|
|
140
|
+
|
|
141
|
+
- **Joins across tables** — read-from-one-table only. Combine in a Postgres view (`CREATE VIEW articles_with_author AS SELECT a.*, u.name AS author_name FROM articles a JOIN users u ON ...`) and point the connector at the view.
|
|
142
|
+
- **Filter rows in Python** — there's no `filter=` kwarg (see [Filtering](#filtering)). Use a view to pre-filter server-side.
|
|
143
|
+
|
|
144
|
+
## Choosing a key
|
|
145
|
+
|
|
146
|
+
The `key` argument controls which rows are visible:
|
|
147
|
+
|
|
148
|
+
- **anon key** — only rows allowed by your Row-Level Security policies. Use this for ingesting publicly readable content.
|
|
149
|
+
- **service-role key** — bypasses RLS. Use this for full-table ingest in trusted backend jobs. Never ship a service-role key to a client.
|
|
150
|
+
|
|
151
|
+
The connector does not enforce this; pick the right key for your use case.
|
|
152
|
+
|
|
153
|
+
## Filtering
|
|
154
|
+
|
|
155
|
+
The connector reads every row in the named table. To restrict ingest to a subset, create a Postgres view in Supabase and point the connector at the view:
|
|
156
|
+
|
|
157
|
+
```sql
|
|
158
|
+
CREATE VIEW search_corpus AS
|
|
159
|
+
SELECT id, title, body FROM articles WHERE published = true;
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
SupabaseConnector(table="search_corpus", ...)
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
A `filter=` kwarg is intentionally not exposed in v1 — Supabase's filter API is method-chained and doesn't fit a single-kwarg shape cleanly. If you need parameterized server-side filtering, open an issue.
|
|
167
|
+
|
|
168
|
+
## Pagination
|
|
169
|
+
|
|
170
|
+
PostgREST is HTTP-only with no streaming cursor, so the connector pages with `.range(start, end)`. Default `page_size=1000` matches PostgREST's default `db-max-rows` cap. **Do not raise this above your project's server-side cap** — PostgREST silently truncates the response, the connector sees a short page, and stops, missing the rest of the table.
|
|
171
|
+
|
|
172
|
+
## Layout
|
|
173
|
+
|
|
174
|
+
```
|
|
175
|
+
src/
|
|
176
|
+
├── __init__.py # re-exports SupabaseConnector and ingest
|
|
177
|
+
├── connector.py # SupabaseConnector class
|
|
178
|
+
└── ingest.py # ingest() - keep in sync with the other connector packages
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## Tests
|
|
182
|
+
|
|
183
|
+
```bash
|
|
184
|
+
pip install -e ".[dev]"
|
|
185
|
+
pytest tests/test_supabase.py -v # mocked, no network needed
|
|
186
|
+
pytest tests/test_integration_supabase_moss.py -v -s # live Supabase + Moss
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
The integration test requires `SUPABASE_URL`, `SUPABASE_KEY`, `MOSS_PROJECT_ID`, `MOSS_PROJECT_KEY`, and `SUPABASE_TEST_TABLE` (a pre-created table with `id`, `title`, `body` columns; supabase-py can't create tables over PostgREST).
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# moss-connector-supabase
|
|
2
|
+
|
|
3
|
+
Supabase source connector for Moss. Uses [supabase-py](https://github.com/supabase/supabase-py) to read rows from a Supabase table over PostgREST.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install moss-connector-supabase
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
This installs `supabase` automatically.
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
import asyncio
|
|
17
|
+
from moss import DocumentInfo
|
|
18
|
+
from moss_connector_supabase import SupabaseConnector, ingest
|
|
19
|
+
|
|
20
|
+
async def main():
|
|
21
|
+
source = SupabaseConnector(
|
|
22
|
+
url="https://xxx.supabase.co",
|
|
23
|
+
key="your-anon-or-service-key",
|
|
24
|
+
table="articles",
|
|
25
|
+
mapper=lambda row: DocumentInfo(
|
|
26
|
+
id=str(row["id"]),
|
|
27
|
+
text=row["body"],
|
|
28
|
+
metadata={"title": row["title"]},
|
|
29
|
+
),
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
result = await ingest(
|
|
33
|
+
source,
|
|
34
|
+
project_id="your_project_id",
|
|
35
|
+
project_key="your_project_key",
|
|
36
|
+
index_name="articles",
|
|
37
|
+
)
|
|
38
|
+
print(f"copied {result.doc_count} rows")
|
|
39
|
+
|
|
40
|
+
asyncio.run(main())
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Use `auto_id=True` when your mapper does not have a stable primary key and you want Moss to generate UUID document IDs.
|
|
44
|
+
|
|
45
|
+
## Data requirements
|
|
46
|
+
|
|
47
|
+
The connector doesn't impose a schema — it reads each row as a dict and passes it to your `mapper`. The constraints come from `DocumentInfo`, not the connector.
|
|
48
|
+
|
|
49
|
+
`DocumentInfo` fields:
|
|
50
|
+
|
|
51
|
+
| Field | Type | Required? | Typical Supabase column |
|
|
52
|
+
|---|---|---|---|
|
|
53
|
+
| `id` | `str` | yes | a primary key — `int`, `uuid`, slug, etc. |
|
|
54
|
+
| `text` | `str` | yes | the column you want to search (`body`, `description`, `content`...) |
|
|
55
|
+
| `metadata` | `Optional[Dict[str, str]]` | no | any other columns you want filterable / displayable |
|
|
56
|
+
| `embedding` | `Optional[Sequence[float]]` | no | only if you bring your own vectors with `model_id="custom"` |
|
|
57
|
+
|
|
58
|
+
So your table needs **at least one stringifiable column** to use as `id` and **at least one text column** to use as `text`. Everything else is optional. Examples:
|
|
59
|
+
|
|
60
|
+
**Minimal** — `id` + `body`:
|
|
61
|
+
|
|
62
|
+
```sql
|
|
63
|
+
CREATE TABLE notes (id int PRIMARY KEY, body text);
|
|
64
|
+
```
|
|
65
|
+
```python
|
|
66
|
+
mapper=lambda row: DocumentInfo(id=str(row["id"]), text=row["body"])
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
**Rich** — extra columns flow into metadata:
|
|
70
|
+
|
|
71
|
+
```sql
|
|
72
|
+
CREATE TABLE articles (
|
|
73
|
+
id uuid PRIMARY KEY,
|
|
74
|
+
title text,
|
|
75
|
+
body text,
|
|
76
|
+
author text,
|
|
77
|
+
published_at timestamptz,
|
|
78
|
+
view_count int,
|
|
79
|
+
tags text[]
|
|
80
|
+
);
|
|
81
|
+
```
|
|
82
|
+
```python
|
|
83
|
+
mapper=lambda row: DocumentInfo(
|
|
84
|
+
id=row["id"],
|
|
85
|
+
text=row["body"],
|
|
86
|
+
metadata={
|
|
87
|
+
"title": row["title"],
|
|
88
|
+
"author": row["author"],
|
|
89
|
+
"published_at": str(row["published_at"]), # timestamp -> str
|
|
90
|
+
"view_count": str(row["view_count"]), # int -> str
|
|
91
|
+
"tags": ",".join(row["tags"]), # array -> joined str
|
|
92
|
+
},
|
|
93
|
+
)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### One gotcha: metadata values must be strings
|
|
97
|
+
|
|
98
|
+
Postgres types like `int`, `bool`, `timestamp`, `numeric`, `array`, `jsonb` come back from Supabase as their native Python types (`int`, `bool`, `datetime`, `list`, `dict`). `DocumentInfo.metadata` requires `Dict[str, str]`, so non-string columns must be coerced in the mapper:
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
# WILL FAIL — non-string values
|
|
102
|
+
metadata={"price": row["price"], "in_stock": row["in_stock"]}
|
|
103
|
+
|
|
104
|
+
# CORRECT
|
|
105
|
+
metadata={"price": str(row["price"]), "in_stock": str(row["in_stock"])}
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
The same applies to `id` if your primary key is an `int` — wrap with `str(...)`.
|
|
109
|
+
|
|
110
|
+
### What you can't do (use a view instead)
|
|
111
|
+
|
|
112
|
+
- **Joins across tables** — read-from-one-table only. Combine in a Postgres view (`CREATE VIEW articles_with_author AS SELECT a.*, u.name AS author_name FROM articles a JOIN users u ON ...`) and point the connector at the view.
|
|
113
|
+
- **Filter rows in Python** — there's no `filter=` kwarg (see [Filtering](#filtering)). Use a view to pre-filter server-side.
|
|
114
|
+
|
|
115
|
+
## Choosing a key
|
|
116
|
+
|
|
117
|
+
The `key` argument controls which rows are visible:
|
|
118
|
+
|
|
119
|
+
- **anon key** — only rows allowed by your Row-Level Security policies. Use this for ingesting publicly readable content.
|
|
120
|
+
- **service-role key** — bypasses RLS. Use this for full-table ingest in trusted backend jobs. Never ship a service-role key to a client.
|
|
121
|
+
|
|
122
|
+
The connector does not enforce this; pick the right key for your use case.
|
|
123
|
+
|
|
124
|
+
## Filtering
|
|
125
|
+
|
|
126
|
+
The connector reads every row in the named table. To restrict ingest to a subset, create a Postgres view in Supabase and point the connector at the view:
|
|
127
|
+
|
|
128
|
+
```sql
|
|
129
|
+
CREATE VIEW search_corpus AS
|
|
130
|
+
SELECT id, title, body FROM articles WHERE published = true;
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
SupabaseConnector(table="search_corpus", ...)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
A `filter=` kwarg is intentionally not exposed in v1 — Supabase's filter API is method-chained and doesn't fit a single-kwarg shape cleanly. If you need parameterized server-side filtering, open an issue.
|
|
138
|
+
|
|
139
|
+
## Pagination
|
|
140
|
+
|
|
141
|
+
PostgREST is HTTP-only with no streaming cursor, so the connector pages with `.range(start, end)`. Default `page_size=1000` matches PostgREST's default `db-max-rows` cap. **Do not raise this above your project's server-side cap** — PostgREST silently truncates the response, the connector sees a short page, and stops, missing the rest of the table.
|
|
142
|
+
|
|
143
|
+
## Layout
|
|
144
|
+
|
|
145
|
+
```
|
|
146
|
+
src/
|
|
147
|
+
├── __init__.py # re-exports SupabaseConnector and ingest
|
|
148
|
+
├── connector.py # SupabaseConnector class
|
|
149
|
+
└── ingest.py # ingest() - keep in sync with the other connector packages
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Tests
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
pip install -e ".[dev]"
|
|
156
|
+
pytest tests/test_supabase.py -v # mocked, no network needed
|
|
157
|
+
pytest tests/test_integration_supabase_moss.py -v -s # live Supabase + Moss
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
The integration test requires `SUPABASE_URL`, `SUPABASE_KEY`, `MOSS_PROJECT_ID`, `MOSS_PROJECT_KEY`, and `SUPABASE_TEST_TABLE` (a pre-created table with `id`, `title`, `body` columns; supabase-py can't create tables over PostgREST).
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: moss-connector-supabase
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Supabase source connector for moss-connectors.
|
|
5
|
+
Author-email: "InferEdge Inc." <contact@moss.dev>
|
|
6
|
+
License: BSD-2-Clause
|
|
7
|
+
Project-URL: Homepage, https://github.com/usemoss/moss
|
|
8
|
+
Project-URL: Repository, https://github.com/usemoss/moss
|
|
9
|
+
Project-URL: Source, https://github.com/usemoss/moss/tree/main/packages/moss-data-connector/moss-connector-supabase
|
|
10
|
+
Keywords: moss,connectors,supabase,postgres,ingest,etl
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Database
|
|
20
|
+
Requires-Python: <3.15,>=3.10
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
Requires-Dist: moss>=1.1.1
|
|
23
|
+
Requires-Dist: supabase>=2.0
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
26
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
27
|
+
Requires-Dist: python-dotenv>=1.0.0; extra == "dev"
|
|
28
|
+
Requires-Dist: ruff>=0.5.0; extra == "dev"
|
|
29
|
+
|
|
30
|
+
# moss-connector-supabase
|
|
31
|
+
|
|
32
|
+
Supabase source connector for Moss. Uses [supabase-py](https://github.com/supabase/supabase-py) to read rows from a Supabase table over PostgREST.
|
|
33
|
+
|
|
34
|
+
## Install
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install moss-connector-supabase
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
This installs `supabase` automatically.
|
|
41
|
+
|
|
42
|
+
## Usage
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
import asyncio
|
|
46
|
+
from moss import DocumentInfo
|
|
47
|
+
from moss_connector_supabase import SupabaseConnector, ingest
|
|
48
|
+
|
|
49
|
+
async def main():
|
|
50
|
+
source = SupabaseConnector(
|
|
51
|
+
url="https://xxx.supabase.co",
|
|
52
|
+
key="your-anon-or-service-key",
|
|
53
|
+
table="articles",
|
|
54
|
+
mapper=lambda row: DocumentInfo(
|
|
55
|
+
id=str(row["id"]),
|
|
56
|
+
text=row["body"],
|
|
57
|
+
metadata={"title": row["title"]},
|
|
58
|
+
),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
result = await ingest(
|
|
62
|
+
source,
|
|
63
|
+
project_id="your_project_id",
|
|
64
|
+
project_key="your_project_key",
|
|
65
|
+
index_name="articles",
|
|
66
|
+
)
|
|
67
|
+
print(f"copied {result.doc_count} rows")
|
|
68
|
+
|
|
69
|
+
asyncio.run(main())
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Use `auto_id=True` when your mapper does not have a stable primary key and you want Moss to generate UUID document IDs.
|
|
73
|
+
|
|
74
|
+
## Data requirements
|
|
75
|
+
|
|
76
|
+
The connector doesn't impose a schema — it reads each row as a dict and passes it to your `mapper`. The constraints come from `DocumentInfo`, not the connector.
|
|
77
|
+
|
|
78
|
+
`DocumentInfo` fields:
|
|
79
|
+
|
|
80
|
+
| Field | Type | Required? | Typical Supabase column |
|
|
81
|
+
|---|---|---|---|
|
|
82
|
+
| `id` | `str` | yes | a primary key — `int`, `uuid`, slug, etc. |
|
|
83
|
+
| `text` | `str` | yes | the column you want to search (`body`, `description`, `content`...) |
|
|
84
|
+
| `metadata` | `Optional[Dict[str, str]]` | no | any other columns you want filterable / displayable |
|
|
85
|
+
| `embedding` | `Optional[Sequence[float]]` | no | only if you bring your own vectors with `model_id="custom"` |
|
|
86
|
+
|
|
87
|
+
So your table needs **at least one stringifiable column** to use as `id` and **at least one text column** to use as `text`. Everything else is optional. Examples:
|
|
88
|
+
|
|
89
|
+
**Minimal** — `id` + `body`:
|
|
90
|
+
|
|
91
|
+
```sql
|
|
92
|
+
CREATE TABLE notes (id int PRIMARY KEY, body text);
|
|
93
|
+
```
|
|
94
|
+
```python
|
|
95
|
+
mapper=lambda row: DocumentInfo(id=str(row["id"]), text=row["body"])
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
**Rich** — extra columns flow into metadata:
|
|
99
|
+
|
|
100
|
+
```sql
|
|
101
|
+
CREATE TABLE articles (
|
|
102
|
+
id uuid PRIMARY KEY,
|
|
103
|
+
title text,
|
|
104
|
+
body text,
|
|
105
|
+
author text,
|
|
106
|
+
published_at timestamptz,
|
|
107
|
+
view_count int,
|
|
108
|
+
tags text[]
|
|
109
|
+
);
|
|
110
|
+
```
|
|
111
|
+
```python
|
|
112
|
+
mapper=lambda row: DocumentInfo(
|
|
113
|
+
id=row["id"],
|
|
114
|
+
text=row["body"],
|
|
115
|
+
metadata={
|
|
116
|
+
"title": row["title"],
|
|
117
|
+
"author": row["author"],
|
|
118
|
+
"published_at": str(row["published_at"]), # timestamp -> str
|
|
119
|
+
"view_count": str(row["view_count"]), # int -> str
|
|
120
|
+
"tags": ",".join(row["tags"]), # array -> joined str
|
|
121
|
+
},
|
|
122
|
+
)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### One gotcha: metadata values must be strings
|
|
126
|
+
|
|
127
|
+
Postgres types like `int`, `bool`, `timestamp`, `numeric`, `array`, `jsonb` come back from Supabase as their native Python types (`int`, `bool`, `datetime`, `list`, `dict`). `DocumentInfo.metadata` requires `Dict[str, str]`, so non-string columns must be coerced in the mapper:
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
# WILL FAIL — non-string values
|
|
131
|
+
metadata={"price": row["price"], "in_stock": row["in_stock"]}
|
|
132
|
+
|
|
133
|
+
# CORRECT
|
|
134
|
+
metadata={"price": str(row["price"]), "in_stock": str(row["in_stock"])}
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
The same applies to `id` if your primary key is an `int` — wrap with `str(...)`.
|
|
138
|
+
|
|
139
|
+
### What you can't do (use a view instead)
|
|
140
|
+
|
|
141
|
+
- **Joins across tables** — read-from-one-table only. Combine in a Postgres view (`CREATE VIEW articles_with_author AS SELECT a.*, u.name AS author_name FROM articles a JOIN users u ON ...`) and point the connector at the view.
|
|
142
|
+
- **Filter rows in Python** — there's no `filter=` kwarg (see [Filtering](#filtering)). Use a view to pre-filter server-side.
|
|
143
|
+
|
|
144
|
+
## Choosing a key
|
|
145
|
+
|
|
146
|
+
The `key` argument controls which rows are visible:
|
|
147
|
+
|
|
148
|
+
- **anon key** — only rows allowed by your Row-Level Security policies. Use this for ingesting publicly readable content.
|
|
149
|
+
- **service-role key** — bypasses RLS. Use this for full-table ingest in trusted backend jobs. Never ship a service-role key to a client.
|
|
150
|
+
|
|
151
|
+
The connector does not enforce this; pick the right key for your use case.
|
|
152
|
+
|
|
153
|
+
## Filtering
|
|
154
|
+
|
|
155
|
+
The connector reads every row in the named table. To restrict ingest to a subset, create a Postgres view in Supabase and point the connector at the view:
|
|
156
|
+
|
|
157
|
+
```sql
|
|
158
|
+
CREATE VIEW search_corpus AS
|
|
159
|
+
SELECT id, title, body FROM articles WHERE published = true;
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
SupabaseConnector(table="search_corpus", ...)
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
A `filter=` kwarg is intentionally not exposed in v1 — Supabase's filter API is method-chained and doesn't fit a single-kwarg shape cleanly. If you need parameterized server-side filtering, open an issue.
|
|
167
|
+
|
|
168
|
+
## Pagination
|
|
169
|
+
|
|
170
|
+
PostgREST is HTTP-only with no streaming cursor, so the connector pages with `.range(start, end)`. Default `page_size=1000` matches PostgREST's default `db-max-rows` cap. **Do not raise this above your project's server-side cap** — PostgREST silently truncates the response, the connector sees a short page, and stops, missing the rest of the table.
|
|
171
|
+
|
|
172
|
+
## Layout
|
|
173
|
+
|
|
174
|
+
```
|
|
175
|
+
src/
|
|
176
|
+
├── __init__.py # re-exports SupabaseConnector and ingest
|
|
177
|
+
├── connector.py # SupabaseConnector class
|
|
178
|
+
└── ingest.py # ingest() - keep in sync with the other connector packages
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## Tests
|
|
182
|
+
|
|
183
|
+
```bash
|
|
184
|
+
pip install -e ".[dev]"
|
|
185
|
+
pytest tests/test_supabase.py -v # mocked, no network needed
|
|
186
|
+
pytest tests/test_integration_supabase_moss.py -v -s # live Supabase + Moss
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
The integration test requires `SUPABASE_URL`, `SUPABASE_KEY`, `MOSS_PROJECT_ID`, `MOSS_PROJECT_KEY`, and `SUPABASE_TEST_TABLE` (a pre-created table with `id`, `title`, `body` columns; supabase-py can't create tables over PostgREST).
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
moss_connector_supabase.egg-info/PKG-INFO
|
|
4
|
+
moss_connector_supabase.egg-info/SOURCES.txt
|
|
5
|
+
moss_connector_supabase.egg-info/dependency_links.txt
|
|
6
|
+
moss_connector_supabase.egg-info/requires.txt
|
|
7
|
+
moss_connector_supabase.egg-info/top_level.txt
|
|
8
|
+
src/__init__.py
|
|
9
|
+
src/connector.py
|
|
10
|
+
src/ingest.py
|
|
11
|
+
tests/test_integration_supabase_moss.py
|
|
12
|
+
tests/test_supabase.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
moss_connector_supabase
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "moss-connector-supabase"
|
|
3
|
+
version = "0.0.1"
|
|
4
|
+
description = "Supabase source connector for moss-connectors."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10,<3.15"
|
|
7
|
+
license = { text = "BSD-2-Clause" }
|
|
8
|
+
authors = [{ name = "InferEdge Inc.", email = "contact@moss.dev" }]
|
|
9
|
+
keywords = ["moss", "connectors", "supabase", "postgres", "ingest", "etl"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 3 - Alpha",
|
|
12
|
+
"Intended Audience :: Developers",
|
|
13
|
+
"License :: OSI Approved :: BSD License",
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Programming Language :: Python :: 3.10",
|
|
16
|
+
"Programming Language :: Python :: 3.11",
|
|
17
|
+
"Programming Language :: Python :: 3.12",
|
|
18
|
+
"Programming Language :: Python :: 3.13",
|
|
19
|
+
"Topic :: Database",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [
|
|
22
|
+
"moss>=1.1.1",
|
|
23
|
+
"supabase>=2.0",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
dev = [
|
|
28
|
+
"pytest>=8.0.0",
|
|
29
|
+
"pytest-asyncio>=0.23.0",
|
|
30
|
+
"python-dotenv>=1.0.0",
|
|
31
|
+
"ruff>=0.5.0",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[project.urls]
|
|
35
|
+
Homepage = "https://github.com/usemoss/moss"
|
|
36
|
+
Repository = "https://github.com/usemoss/moss"
|
|
37
|
+
Source = "https://github.com/usemoss/moss/tree/main/packages/moss-data-connector/moss-connector-supabase"
|
|
38
|
+
|
|
39
|
+
[build-system]
|
|
40
|
+
requires = ["setuptools>=61.0"]
|
|
41
|
+
build-backend = "setuptools.build_meta"
|
|
42
|
+
|
|
43
|
+
# Flat layout: src/ itself IS the package `moss_connector_supabase`.
|
|
44
|
+
[tool.setuptools]
|
|
45
|
+
packages = ["moss_connector_supabase"]
|
|
46
|
+
package-dir = { "moss_connector_supabase" = "src" }
|
|
47
|
+
|
|
48
|
+
[tool.ruff]
|
|
49
|
+
line-length = 100
|
|
50
|
+
target-version = "py310"
|
|
51
|
+
|
|
52
|
+
[tool.ruff.lint]
|
|
53
|
+
select = ["E", "W", "F", "I", "B", "UP"]
|
|
54
|
+
|
|
55
|
+
[tool.pytest.ini_options]
|
|
56
|
+
asyncio_mode = "auto"
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable, Iterator
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from moss import DocumentInfo
|
|
7
|
+
from supabase import create_client
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SupabaseConnector:
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
url: str,
|
|
14
|
+
key: str,
|
|
15
|
+
table: str,
|
|
16
|
+
mapper: Callable[[dict[str, Any]], DocumentInfo],
|
|
17
|
+
select: str = "*",
|
|
18
|
+
page_size: int = 1000,
|
|
19
|
+
) -> None:
|
|
20
|
+
self.url = url
|
|
21
|
+
self.key = key
|
|
22
|
+
self.table = table
|
|
23
|
+
self.mapper = mapper
|
|
24
|
+
self.select = select
|
|
25
|
+
self.page_size = page_size
|
|
26
|
+
|
|
27
|
+
def __iter__(self) -> Iterator[DocumentInfo]:
|
|
28
|
+
client = create_client(self.url, self.key)
|
|
29
|
+
start = 0
|
|
30
|
+
while True:
|
|
31
|
+
end = start + self.page_size - 1
|
|
32
|
+
resp = client.table(self.table).select(self.select).range(start, end).execute()
|
|
33
|
+
rows = resp.data or []
|
|
34
|
+
if not rows:
|
|
35
|
+
return
|
|
36
|
+
for row in rows:
|
|
37
|
+
yield self.mapper(row)
|
|
38
|
+
if len(rows) < self.page_size:
|
|
39
|
+
return
|
|
40
|
+
start += self.page_size
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
from collections.abc import Iterable
|
|
5
|
+
|
|
6
|
+
from moss import DocumentInfo, MossClient, MutationResult
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _replace_doc_id(doc: DocumentInfo) -> DocumentInfo:
|
|
10
|
+
return DocumentInfo(
|
|
11
|
+
id=str(uuid.uuid4()),
|
|
12
|
+
text=doc.text,
|
|
13
|
+
metadata=getattr(doc, "metadata", None),
|
|
14
|
+
embedding=getattr(doc, "embedding", None),
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def ingest(
|
|
19
|
+
source: Iterable[DocumentInfo],
|
|
20
|
+
project_id: str,
|
|
21
|
+
project_key: str,
|
|
22
|
+
index_name: str,
|
|
23
|
+
model_id: str | None = None,
|
|
24
|
+
auto_id: bool = False,
|
|
25
|
+
) -> MutationResult | None:
|
|
26
|
+
if auto_id:
|
|
27
|
+
docs = [_replace_doc_id(doc) for doc in source]
|
|
28
|
+
else:
|
|
29
|
+
docs = list(source)
|
|
30
|
+
if not docs:
|
|
31
|
+
return None
|
|
32
|
+
client = MossClient(project_id, project_key)
|
|
33
|
+
return await client.create_index(index_name, docs, model_id=model_id)
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import uuid
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
pytest.importorskip("supabase")
|
|
10
|
+
|
|
11
|
+
from moss import DocumentInfo, MossClient, QueryOptions # noqa: E402
|
|
12
|
+
from moss_connector_supabase import SupabaseConnector, ingest # noqa: E402
|
|
13
|
+
from supabase import create_client # noqa: E402
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
from dotenv import load_dotenv
|
|
17
|
+
|
|
18
|
+
_here = Path(__file__).resolve()
|
|
19
|
+
for candidate in (
|
|
20
|
+
_here.parents[1] / ".env",
|
|
21
|
+
_here.parents[2] / ".env",
|
|
22
|
+
_here.parents[4] / ".env",
|
|
23
|
+
):
|
|
24
|
+
if candidate.exists():
|
|
25
|
+
load_dotenv(candidate, override=False)
|
|
26
|
+
except ImportError:
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
SUPABASE_URL = os.getenv("SUPABASE_URL")
|
|
30
|
+
SUPABASE_KEY = os.getenv("SUPABASE_KEY")
|
|
31
|
+
PROJECT_ID = os.getenv("MOSS_PROJECT_ID")
|
|
32
|
+
PROJECT_KEY = os.getenv("MOSS_PROJECT_KEY")
|
|
33
|
+
PRESET_TABLE = os.getenv("SUPABASE_TEST_TABLE")
|
|
34
|
+
|
|
35
|
+
pytestmark = pytest.mark.skipif(
|
|
36
|
+
not (SUPABASE_URL and SUPABASE_KEY and PROJECT_ID and PROJECT_KEY),
|
|
37
|
+
reason="Set SUPABASE_URL, SUPABASE_KEY, MOSS_PROJECT_ID, MOSS_PROJECT_KEY to run.",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
_SEED_ROWS = [
|
|
42
|
+
{"id": 1, "title": "Refund policy", "body": "Refunds take 3 to 5 business days."},
|
|
43
|
+
{"id": 2, "title": "Shipping time", "body": "Orders ship within 24 hours."},
|
|
44
|
+
{"id": 3, "title": "Contact support", "body": "Reach support 24/7 via live chat."},
|
|
45
|
+
{"id": 4, "title": "Password reset", "body": "Click the link on the login page."},
|
|
46
|
+
{"id": 5, "title": "Order tracking", "body": "Tracking number sent by email."},
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@pytest.fixture()
|
|
51
|
+
def supabase_table():
|
|
52
|
+
"""Insert seed rows into a Supabase table, clean them up after the test.
|
|
53
|
+
|
|
54
|
+
If SUPABASE_TEST_TABLE is set, use that pre-existing table and assume the
|
|
55
|
+
project does not allow ad-hoc DDL via PostgREST. Otherwise, attempt to use
|
|
56
|
+
a table named "moss_test_<uuid>" — note that supabase-py cannot create
|
|
57
|
+
tables (no DDL over PostgREST), so the user must set SUPABASE_TEST_TABLE
|
|
58
|
+
pointing at a manually-created table.
|
|
59
|
+
"""
|
|
60
|
+
client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
|
61
|
+
if not PRESET_TABLE:
|
|
62
|
+
pytest.skip(
|
|
63
|
+
"Set SUPABASE_TEST_TABLE to a pre-existing table with id (int) "
|
|
64
|
+
"and body (text) columns; supabase-py cannot create tables over "
|
|
65
|
+
"PostgREST."
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
table_name = PRESET_TABLE
|
|
69
|
+
inserted_ids = [r["id"] for r in _SEED_ROWS]
|
|
70
|
+
try:
|
|
71
|
+
client.table(table_name).upsert(_SEED_ROWS).execute()
|
|
72
|
+
yield table_name
|
|
73
|
+
finally:
|
|
74
|
+
try:
|
|
75
|
+
client.table(table_name).delete().in_("id", inserted_ids).execute()
|
|
76
|
+
except Exception as exc: # pragma: no cover, best-effort cleanup
|
|
77
|
+
print(f"warning: failed to clean up rows in {table_name}: {exc}")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
async def test_supabase_ingest_end_to_end(supabase_table):
|
|
81
|
+
"""Full round trip: Supabase → Moss index → query → delete."""
|
|
82
|
+
table_name = supabase_table
|
|
83
|
+
client = MossClient(PROJECT_ID, PROJECT_KEY)
|
|
84
|
+
|
|
85
|
+
index_name = f"moss-supabase-e2e-{uuid.uuid4().hex[:8]}"
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
connector = SupabaseConnector(
|
|
89
|
+
url=SUPABASE_URL,
|
|
90
|
+
key=SUPABASE_KEY,
|
|
91
|
+
table=table_name,
|
|
92
|
+
mapper=lambda r: DocumentInfo(
|
|
93
|
+
id=str(r["id"]),
|
|
94
|
+
text=r["body"],
|
|
95
|
+
metadata={"title": r["title"]},
|
|
96
|
+
),
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
result = await ingest(connector, PROJECT_ID, PROJECT_KEY, index_name=index_name)
|
|
100
|
+
assert result is not None
|
|
101
|
+
assert result.doc_count >= 5
|
|
102
|
+
|
|
103
|
+
await client.load_index(index_name)
|
|
104
|
+
result = await client.query(index_name, "how long do refunds take", QueryOptions(top_k=3))
|
|
105
|
+
|
|
106
|
+
assert result.docs, "expected at least one document in the search result"
|
|
107
|
+
top_ids = [d.id for d in result.docs]
|
|
108
|
+
assert "1" in top_ids, f"refund-policy doc not in top 3: {top_ids}"
|
|
109
|
+
|
|
110
|
+
refund_doc = next(d for d in result.docs if d.id == "1")
|
|
111
|
+
assert refund_doc.metadata is not None
|
|
112
|
+
assert refund_doc.metadata.get("title") == "Refund policy"
|
|
113
|
+
|
|
114
|
+
finally:
|
|
115
|
+
try:
|
|
116
|
+
await client.delete_index(index_name)
|
|
117
|
+
except Exception as exc: # pragma: no cover
|
|
118
|
+
print(f"warning: failed to delete test index {index_name}: {exc}")
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Any
|
|
6
|
+
from unittest.mock import MagicMock, patch
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
pytest.importorskip("supabase")
|
|
11
|
+
|
|
12
|
+
from moss import DocumentInfo # noqa: E402
|
|
13
|
+
from moss_connector_supabase import SupabaseConnector, ingest # noqa: E402
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class FakeMutationResult:
|
|
18
|
+
doc_count: int
|
|
19
|
+
job_id: str = "fake-job-id"
|
|
20
|
+
index_name: str = ""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class FakeMossClient:
|
|
25
|
+
calls: list[dict[str, Any]] = field(default_factory=list)
|
|
26
|
+
|
|
27
|
+
async def create_index(self, name, docs, model_id=None):
|
|
28
|
+
docs = list(docs)
|
|
29
|
+
self.calls.append({"name": name, "docs": docs, "model_id": model_id})
|
|
30
|
+
return FakeMutationResult(doc_count=len(docs), index_name=name)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _supabase_mock_paginating(pages: list[list[dict[str, Any]]]) -> MagicMock:
|
|
34
|
+
"""Build a mock supabase client whose
|
|
35
|
+
``.table().select().range().execute()`` chain returns successive pages.
|
|
36
|
+
|
|
37
|
+
After all pages are exhausted, returns an empty page so the connector's
|
|
38
|
+
``while True`` loop terminates safely even if length-based exit conditions
|
|
39
|
+
don't fire (e.g. final page exactly equals page_size).
|
|
40
|
+
"""
|
|
41
|
+
responses = [MagicMock(data=p) for p in pages] + [MagicMock(data=[])]
|
|
42
|
+
range_mock = MagicMock()
|
|
43
|
+
range_mock.execute.side_effect = responses
|
|
44
|
+
|
|
45
|
+
select_mock = MagicMock()
|
|
46
|
+
select_mock.range.return_value = range_mock
|
|
47
|
+
|
|
48
|
+
table_mock = MagicMock()
|
|
49
|
+
table_mock.select.return_value = select_mock
|
|
50
|
+
|
|
51
|
+
client = MagicMock()
|
|
52
|
+
client.table.return_value = table_mock
|
|
53
|
+
return client
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
async def test_supabase_ingest_end_to_end():
|
|
57
|
+
rows_from_supabase = [
|
|
58
|
+
{"id": 1, "title": "Refund policy", "body": "Refunds take 3–5 days."},
|
|
59
|
+
{"id": 2, "title": "Shipping", "body": "We ship within 24 hours."},
|
|
60
|
+
{"id": 3, "title": "Returns", "body": "Returns accepted within 30 days."},
|
|
61
|
+
]
|
|
62
|
+
fake_client = _supabase_mock_paginating([rows_from_supabase])
|
|
63
|
+
fake_moss = FakeMossClient()
|
|
64
|
+
|
|
65
|
+
with (
|
|
66
|
+
patch(
|
|
67
|
+
"moss_connector_supabase.connector.create_client",
|
|
68
|
+
return_value=fake_client,
|
|
69
|
+
),
|
|
70
|
+
patch(
|
|
71
|
+
"moss_connector_supabase.ingest.MossClient",
|
|
72
|
+
return_value=fake_moss,
|
|
73
|
+
),
|
|
74
|
+
):
|
|
75
|
+
source = SupabaseConnector(
|
|
76
|
+
url="https://x.supabase.co",
|
|
77
|
+
key="anon",
|
|
78
|
+
table="articles",
|
|
79
|
+
mapper=lambda r: DocumentInfo(
|
|
80
|
+
id=str(r["id"]),
|
|
81
|
+
text=r["body"],
|
|
82
|
+
metadata={"title": r["title"]},
|
|
83
|
+
),
|
|
84
|
+
)
|
|
85
|
+
result = await ingest(source, "fake_id", "fake_key", index_name="articles")
|
|
86
|
+
|
|
87
|
+
assert result is not None
|
|
88
|
+
assert result.doc_count == 3
|
|
89
|
+
assert len(fake_moss.calls) == 1
|
|
90
|
+
|
|
91
|
+
moss_docs = fake_moss.calls[0]["docs"]
|
|
92
|
+
assert moss_docs[0].id == "1"
|
|
93
|
+
assert moss_docs[0].text == "Refunds take 3–5 days."
|
|
94
|
+
assert moss_docs[0].metadata == {"title": "Refund policy"}
|
|
95
|
+
assert moss_docs[2].id == "3"
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
async def test_auto_id_defaults_to_false():
|
|
99
|
+
rows_from_supabase = [
|
|
100
|
+
{"id": 1, "title": "T1", "body": "B1"},
|
|
101
|
+
{"id": 2, "title": "T2", "body": "B2"},
|
|
102
|
+
{"id": 3, "title": "T3", "body": "B3"},
|
|
103
|
+
]
|
|
104
|
+
fake_client = _supabase_mock_paginating([rows_from_supabase])
|
|
105
|
+
fake_moss = FakeMossClient()
|
|
106
|
+
|
|
107
|
+
with (
|
|
108
|
+
patch(
|
|
109
|
+
"moss_connector_supabase.connector.create_client",
|
|
110
|
+
return_value=fake_client,
|
|
111
|
+
),
|
|
112
|
+
patch(
|
|
113
|
+
"moss_connector_supabase.ingest.MossClient",
|
|
114
|
+
return_value=fake_moss,
|
|
115
|
+
),
|
|
116
|
+
):
|
|
117
|
+
source = SupabaseConnector(
|
|
118
|
+
url="https://x.supabase.co",
|
|
119
|
+
key="anon",
|
|
120
|
+
table="articles",
|
|
121
|
+
mapper=lambda r: DocumentInfo(
|
|
122
|
+
id=str(r["id"]),
|
|
123
|
+
text=r["body"],
|
|
124
|
+
metadata={"title": r["title"]},
|
|
125
|
+
),
|
|
126
|
+
)
|
|
127
|
+
await ingest(source, "fake_id", "fake_key", index_name="articles")
|
|
128
|
+
|
|
129
|
+
assert len(fake_moss.calls) == 1
|
|
130
|
+
docs = fake_moss.calls[0]["docs"]
|
|
131
|
+
assert docs[0].id == "1"
|
|
132
|
+
assert docs[1].id == "2"
|
|
133
|
+
assert docs[2].id == "3"
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
async def test_auto_id_replaces_mapper_id():
|
|
137
|
+
rows_from_supabase = [
|
|
138
|
+
{"id": 1, "title": "T1", "body": "B1"},
|
|
139
|
+
{"id": 2, "title": "T2", "body": "B2"},
|
|
140
|
+
{"id": 3, "title": "T3", "body": "B3"},
|
|
141
|
+
]
|
|
142
|
+
fake_client = _supabase_mock_paginating([rows_from_supabase])
|
|
143
|
+
fake_moss = FakeMossClient()
|
|
144
|
+
|
|
145
|
+
with (
|
|
146
|
+
patch(
|
|
147
|
+
"moss_connector_supabase.connector.create_client",
|
|
148
|
+
return_value=fake_client,
|
|
149
|
+
),
|
|
150
|
+
patch(
|
|
151
|
+
"moss_connector_supabase.ingest.MossClient",
|
|
152
|
+
return_value=fake_moss,
|
|
153
|
+
),
|
|
154
|
+
):
|
|
155
|
+
source = SupabaseConnector(
|
|
156
|
+
url="https://x.supabase.co",
|
|
157
|
+
key="anon",
|
|
158
|
+
table="articles",
|
|
159
|
+
mapper=lambda r: DocumentInfo(
|
|
160
|
+
id=str(r["id"]),
|
|
161
|
+
text=r["body"],
|
|
162
|
+
metadata={"title": r["title"]},
|
|
163
|
+
),
|
|
164
|
+
)
|
|
165
|
+
await ingest(
|
|
166
|
+
source,
|
|
167
|
+
"fake_id",
|
|
168
|
+
"fake_key",
|
|
169
|
+
index_name="articles",
|
|
170
|
+
auto_id=True,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
assert len(fake_moss.calls) == 1
|
|
174
|
+
docs = fake_moss.calls[0]["docs"]
|
|
175
|
+
assert len(docs) == 3
|
|
176
|
+
original_ids = {"1", "2", "3"}
|
|
177
|
+
for doc in docs:
|
|
178
|
+
assert doc.id
|
|
179
|
+
assert uuid.UUID(doc.id)
|
|
180
|
+
assert doc.id not in original_ids
|
|
181
|
+
assert [doc.text for doc in docs] == ["B1", "B2", "B3"]
|
|
182
|
+
assert [doc.metadata for doc in docs] == [
|
|
183
|
+
{"title": "T1"},
|
|
184
|
+
{"title": "T2"},
|
|
185
|
+
{"title": "T3"},
|
|
186
|
+
]
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
async def test_pagination_advances_range_cursor():
|
|
190
|
+
"""With a small page_size, the connector should request successive ranges
|
|
191
|
+
until a short page (or empty page) signals end of data."""
|
|
192
|
+
page1 = [{"id": i, "body": f"row {i}"} for i in range(2)]
|
|
193
|
+
page2 = [{"id": i, "body": f"row {i}"} for i in range(2, 3)] # short, ends loop
|
|
194
|
+
fake_client = _supabase_mock_paginating([page1, page2])
|
|
195
|
+
fake_moss = FakeMossClient()
|
|
196
|
+
|
|
197
|
+
with (
|
|
198
|
+
patch(
|
|
199
|
+
"moss_connector_supabase.connector.create_client",
|
|
200
|
+
return_value=fake_client,
|
|
201
|
+
),
|
|
202
|
+
patch(
|
|
203
|
+
"moss_connector_supabase.ingest.MossClient",
|
|
204
|
+
return_value=fake_moss,
|
|
205
|
+
),
|
|
206
|
+
):
|
|
207
|
+
source = SupabaseConnector(
|
|
208
|
+
url="https://x.supabase.co",
|
|
209
|
+
key="anon",
|
|
210
|
+
table="t",
|
|
211
|
+
mapper=lambda r: DocumentInfo(id=str(r["id"]), text=r["body"]),
|
|
212
|
+
page_size=2,
|
|
213
|
+
)
|
|
214
|
+
result = await ingest(source, "fake_id", "fake_key", "t")
|
|
215
|
+
|
|
216
|
+
assert result.doc_count == 3
|
|
217
|
+
moss_docs = fake_moss.calls[0]["docs"]
|
|
218
|
+
assert [d.id for d in moss_docs] == ["0", "1", "2"]
|
|
219
|
+
|
|
220
|
+
# Verify range() was called with advancing offsets: (0, 1), (2, 3).
|
|
221
|
+
range_calls = fake_client.table.return_value.select.return_value.range.call_args_list
|
|
222
|
+
assert range_calls[0].args == (0, 1)
|
|
223
|
+
assert range_calls[1].args == (2, 3)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
async def test_empty_result_skips_network_call():
|
|
227
|
+
fake_client = _supabase_mock_paginating([])
|
|
228
|
+
fake_moss = FakeMossClient()
|
|
229
|
+
|
|
230
|
+
with (
|
|
231
|
+
patch(
|
|
232
|
+
"moss_connector_supabase.connector.create_client",
|
|
233
|
+
return_value=fake_client,
|
|
234
|
+
),
|
|
235
|
+
patch(
|
|
236
|
+
"moss_connector_supabase.ingest.MossClient",
|
|
237
|
+
return_value=fake_moss,
|
|
238
|
+
),
|
|
239
|
+
):
|
|
240
|
+
source = SupabaseConnector(
|
|
241
|
+
url="https://x.supabase.co",
|
|
242
|
+
key="anon",
|
|
243
|
+
table="empty",
|
|
244
|
+
mapper=lambda r: DocumentInfo(id=str(r["id"]), text=""),
|
|
245
|
+
)
|
|
246
|
+
result = await ingest(source, "fake_id", "fake_key", "empty")
|
|
247
|
+
|
|
248
|
+
assert result is None
|
|
249
|
+
assert fake_moss.calls == []
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
async def test_select_kwarg_forwarded():
|
|
253
|
+
"""The ``select`` kwarg should be passed straight through to
|
|
254
|
+
PostgREST's ``.select()``."""
|
|
255
|
+
fake_client = _supabase_mock_paginating([])
|
|
256
|
+
|
|
257
|
+
with patch(
|
|
258
|
+
"moss_connector_supabase.connector.create_client",
|
|
259
|
+
return_value=fake_client,
|
|
260
|
+
):
|
|
261
|
+
source = SupabaseConnector(
|
|
262
|
+
url="https://x.supabase.co",
|
|
263
|
+
key="anon",
|
|
264
|
+
table="t",
|
|
265
|
+
mapper=lambda r: DocumentInfo(id="x", text="y"),
|
|
266
|
+
select="id,body,title",
|
|
267
|
+
)
|
|
268
|
+
list(source) # exhaust to trigger the call
|
|
269
|
+
|
|
270
|
+
fake_client.table.return_value.select.assert_called_once_with("id,body,title")
|