multiwoven-integrations 0.30.0 → 0.30.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/multiwoven/integrations/rollout.rb +1 -1
- data/lib/multiwoven/integrations/source/amazon_s3/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/anthropic/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/aws_athena/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/aws_bedrock_model/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/aws_sagemaker_model/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/bigquery/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/clickhouse/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/databricks/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/databrics_model/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/firecrawl/client.rb +14 -50
- data/lib/multiwoven/integrations/source/firecrawl/config/catalog.json +26 -0
- data/lib/multiwoven/integrations/source/firecrawl/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/generic_open_ai/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/google_vertex_model/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/http_model/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/intuit_quick_books/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/maria_db/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/open_ai/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/oracle_db/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/pinecone_db/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/postgresql/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/qdrant/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/redshift/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/salesforce_consumer_goods_cloud/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/sftp/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/snowflake/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/watsonx_ai/config/meta.json +1 -0
- data/lib/multiwoven/integrations/source/watsonx_data/config/meta.json +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 51616b2e7b06336873aceb0c9e5731526dd7e57ba332eab190bc215d99c4ed8d
|
4
|
+
data.tar.gz: d376ae2826566eea31fd92dbd4e4ddf16034e1ef9085edf08f7b554744424583
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fc9778e2da0499ed9bec602005b3b69aadd8e038e251d77acc8407bde99dbf213c1ff0d6f937a832c1d4aa660dbae578de8042caf9c0720fca3ddcff2e0d762a
|
7
|
+
data.tar.gz: a8438815522f82a1bc6db95c6bd29c94a55f182bf459f90c3e01209f54b17e303d939280a63549f6c1eff5da4e57ca1f80e85b98646f6f935995f3a371f7b274
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "Amazon S3",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "Data Lake",
|
7
|
+
"sub_category": "Relational Database",
|
7
8
|
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/amazon_s3",
|
8
9
|
"github_issue_label": "source-amazons3",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "Anthropic Model Endpoint",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "AI Model",
|
7
|
+
"sub_category": "LLM",
|
7
8
|
"documentation_url": "https://docs.squared.ai/activation/ai-ml-sources/anthropic-model",
|
8
9
|
"github_issue_label": "source-anthropic-model",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "AWS Athena",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "Data Warehouse",
|
7
|
+
"sub_category": "Relational Database",
|
7
8
|
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/aws_athena",
|
8
9
|
"github_issue_label": "source-aws-athena",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "AWS Bedrock Model",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "AI Model",
|
7
|
+
"sub_category": "AI_ML Service",
|
7
8
|
"documentation_url": "https://docs.squared.ai/activation/ai-ml-sources/aws_bedrock-model",
|
8
9
|
"github_issue_label": "source-aws-bedrock-model",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "AWS Sagemaker Model",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "AI Model",
|
7
|
+
"sub_category": "AI_ML Service",
|
7
8
|
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/aws_sagemaker-model",
|
8
9
|
"github_issue_label": "source-aws-sagemaker-model",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "Google BigQuery",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "Data Warehouse",
|
7
|
+
"sub_category": "Relational Database",
|
7
8
|
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/bquery",
|
8
9
|
"github_issue_label": "source-bigquery",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "ClickHouse",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "Data Warehouse",
|
7
|
+
"sub_category": "Relational Database",
|
7
8
|
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/clickhouse",
|
8
9
|
"github_issue_label": "source-clickhouse",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "Databricks Data Warehouse",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "Data Warehouse",
|
7
|
+
"sub_category": "Relational Database",
|
7
8
|
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/databricks",
|
8
9
|
"github_issue_label": "source-databricks",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "Databricks Model",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "AI Model",
|
7
|
+
"sub_category": "AI_ML Service",
|
7
8
|
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/databricks-model",
|
8
9
|
"github_issue_label": "source-databricks-foundation",
|
9
10
|
"icon": "icon.svg",
|
@@ -18,12 +18,9 @@ module Multiwoven::Integrations::Source
|
|
18
18
|
failure_status(e)
|
19
19
|
end
|
20
20
|
|
21
|
-
def discover(
|
22
|
-
|
23
|
-
|
24
|
-
response = execute_scrape(FIRECRAWL_SCRAPE_URL)
|
25
|
-
results = JSON.parse(response.body)
|
26
|
-
catalog = Catalog.new(streams: create_streams(results))
|
21
|
+
def discover(_connection_config = nil)
|
22
|
+
catalog_json = read_json(CATALOG_SPEC_PATH)
|
23
|
+
catalog = build_catalog(catalog_json)
|
27
24
|
catalog.to_multiwoven_message
|
28
25
|
rescue StandardError => e
|
29
26
|
handle_exception(e, { context: "FIRECRAWL:DISCOVER:EXCEPTION", type: "error" })
|
@@ -32,9 +29,8 @@ module Multiwoven::Integrations::Source
|
|
32
29
|
def read(sync_config)
|
33
30
|
connection_config = sync_config.source.connection_specification
|
34
31
|
connection_config = connection_config.with_indifferent_access
|
35
|
-
query = sync_config.model.query
|
36
32
|
url = create_connection(connection_config)
|
37
|
-
query(url,
|
33
|
+
query(url, nil)
|
38
34
|
rescue StandardError => e
|
39
35
|
handle_exception(e, {
|
40
36
|
context: "FIRECRAWL:READ:EXCEPTION",
|
@@ -62,7 +58,16 @@ module Multiwoven::Integrations::Source
|
|
62
58
|
FIRECRAWL_CRAWL_URL
|
63
59
|
end
|
64
60
|
|
65
|
-
def query(url,
|
61
|
+
def query(url, query)
|
62
|
+
has_limit = query.match(/LIMIT\s+(\d+)\s*$/i) if query.present?
|
63
|
+
if has_limit.present?
|
64
|
+
if @config["includePaths"]&.any?
|
65
|
+
path = @config["includePaths"].first
|
66
|
+
@config["url"] = URI.join(@config["url"], path).to_s
|
67
|
+
end
|
68
|
+
@config.delete("includePaths")
|
69
|
+
@config[:limit] = has_limit[1].to_i
|
70
|
+
end
|
66
71
|
request = execute_crawl(url)
|
67
72
|
request = JSON.parse(request.body)
|
68
73
|
crawl_url = get_request_url(request)
|
@@ -89,16 +94,6 @@ module Multiwoven::Integrations::Source
|
|
89
94
|
)
|
90
95
|
end
|
91
96
|
|
92
|
-
def execute_scrape(url)
|
93
|
-
send_request(
|
94
|
-
url: url,
|
95
|
-
http_method: HTTP_POST,
|
96
|
-
payload: JSON.parse({ "url": @base_url }.to_json),
|
97
|
-
headers: auth_headers(@api_key),
|
98
|
-
config: {}
|
99
|
-
)
|
100
|
-
end
|
101
|
-
|
102
97
|
def crawl_activity
|
103
98
|
send_request(
|
104
99
|
url: FIRECRAWL_CRAWL_ACTIVE_URL,
|
@@ -148,37 +143,6 @@ module Multiwoven::Integrations::Source
|
|
148
143
|
end
|
149
144
|
end
|
150
145
|
|
151
|
-
def create_streams(records)
|
152
|
-
group_by_table(records).map do |r|
|
153
|
-
Multiwoven::Integrations::Protocol::Stream.new(name: r[:tablename], action: StreamAction["fetch"], json_schema: convert_to_json_schema(r[:columns]))
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
def group_by_table(response)
|
158
|
-
columns = response["data"].map do |key, value|
|
159
|
-
{
|
160
|
-
column_name: key,
|
161
|
-
data_type: "string",
|
162
|
-
is_nullable: value.nil?
|
163
|
-
}
|
164
|
-
end
|
165
|
-
|
166
|
-
if response["data"]["metadata"]["url"]
|
167
|
-
columns << {
|
168
|
-
column_name: "url",
|
169
|
-
data_type: "string",
|
170
|
-
is_nullable: response["data"]["metadata"]["url"].nil?
|
171
|
-
}
|
172
|
-
end
|
173
|
-
|
174
|
-
[
|
175
|
-
{
|
176
|
-
tablename: "scrape",
|
177
|
-
columns: columns
|
178
|
-
}
|
179
|
-
]
|
180
|
-
end
|
181
|
-
|
182
146
|
def build_url(url, id)
|
183
147
|
format(url, id: id)
|
184
148
|
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
{
|
2
|
+
"request_rate_limit": 15,
|
3
|
+
"request_rate_limit_unit": "minute",
|
4
|
+
"request_rate_concurrency": 5,
|
5
|
+
"streams": [
|
6
|
+
{
|
7
|
+
"name": "scrape",
|
8
|
+
"action": "fetch",
|
9
|
+
"json_schema": {
|
10
|
+
"type": "object",
|
11
|
+
"properties": {
|
12
|
+
"markdown": {
|
13
|
+
"type": "string"
|
14
|
+
},
|
15
|
+
"metadata": {
|
16
|
+
"type": "string"
|
17
|
+
},
|
18
|
+
"url": {
|
19
|
+
"type": "string"
|
20
|
+
}
|
21
|
+
}
|
22
|
+
},
|
23
|
+
"supported_sync_modes": ["incremental"]
|
24
|
+
}
|
25
|
+
]
|
26
|
+
}
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "Firecrawl",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "Data Warehouse",
|
7
|
+
"sub_category": "Web Scraper",
|
7
8
|
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/firecrawl",
|
8
9
|
"github_issue_label": "source-firecrawl",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "Generic OpenAI Spec Endpoint",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "AI Model",
|
7
|
+
"sub_category": "AI_ML Service",
|
7
8
|
"documentation_url": "https://docs.squared.ai/activation/ai-ml-sources/generic_open_ai-endpoint",
|
8
9
|
"github_issue_label": "source-generic-open-ai-model",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "Google Vertex",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "AI Model",
|
7
|
+
"sub_category": "AI_ML Service",
|
7
8
|
"documentation_url": "https://docs.squared.ai/activation/ai-ml-sources/google_vertex-model",
|
8
9
|
"github_issue_label": "source-vertex",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "HTTP Model Endpoint",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "AI Model",
|
7
|
+
"sub_category": "AI_ML Service",
|
7
8
|
"documentation_url": "https://docs.squared.ai/activation/ai-ml-sources/http-model-endpoint",
|
8
9
|
"github_issue_label": "source-http-model",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "Intuit QuickBooks",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "Data Warehouse",
|
7
|
+
"sub_category": "Relational Database",
|
7
8
|
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/intuit_quickbooks",
|
8
9
|
"github_issue_label": "source-intuit-quickbooks",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "Maria DB",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "Data Warehouse",
|
7
|
+
"sub_category": "Relational Database",
|
7
8
|
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/maria_db",
|
8
9
|
"github_issue_label": "source-maria-db",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "OpenAI Model Endpoint",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "AI Model",
|
7
|
+
"sub_category": "LLM",
|
7
8
|
"documentation_url": "https://docs.squared.ai/activation/ai-ml-sources/open_ai-model",
|
8
9
|
"github_issue_label": "source-open-ai-model",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "Pinecone DB",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "Database",
|
7
|
+
"sub_category": "Vector Database",
|
7
8
|
"documentation_url": "https://docs.squared.ai/activation/vector_search/pinecone_db",
|
8
9
|
"github_issue_label": "source-pinecone-db",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "PostgreSQL",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "Data Warehouse",
|
7
|
+
"sub_category": "Relational Database",
|
7
8
|
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/postgresql",
|
8
9
|
"github_issue_label": "source-postgresql",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "Amazon Redshift",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "Data Warehouse",
|
7
|
+
"sub_category": "Relational Database",
|
7
8
|
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/redshift",
|
8
9
|
"github_issue_label": "source-redshift",
|
9
10
|
"icon": "icon.svg",
|
@@ -6,6 +6,7 @@
|
|
6
6
|
"connector_sub_type": "api",
|
7
7
|
"direct_query_support": true,
|
8
8
|
"category": "Retail",
|
9
|
+
"sub_category": "Relational Database",
|
9
10
|
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/salesforce-consumer-goods-cloud",
|
10
11
|
"github_issue_label": "source-salesforce-consumer-goods",
|
11
12
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "Snowflake",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "Data Warehouse",
|
7
|
+
"sub_category": "Relational Database",
|
7
8
|
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/snowflake",
|
8
9
|
"github_issue_label": "source-snowflake",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "WatsonX AI Model Endpoint",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "AI Model",
|
7
|
+
"sub_category": "AI_ML Service",
|
7
8
|
"documentation_url": "https://docs.squared.ai/activation/ai-ml-sources/watsonx_ai-model",
|
8
9
|
"github_issue_label": "source-watsonx-ai-model",
|
9
10
|
"icon": "icon.svg",
|
@@ -4,6 +4,7 @@
|
|
4
4
|
"title": "WatsonX Data Endpoint",
|
5
5
|
"connector_type": "source",
|
6
6
|
"category": "Data Warehouse",
|
7
|
+
"sub_category": "Relational Database",
|
7
8
|
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/watsonx_data",
|
8
9
|
"github_issue_label": "source-watsonx-data-endpoint",
|
9
10
|
"icon": "icon.svg",
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: multiwoven-integrations
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.30.
|
4
|
+
version: 0.30.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Subin T P
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-06-
|
11
|
+
date: 2025-06-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -752,6 +752,7 @@ files:
|
|
752
752
|
- lib/multiwoven/integrations/source/databrics_model/config/spec.json
|
753
753
|
- lib/multiwoven/integrations/source/databrics_model/icon.svg
|
754
754
|
- lib/multiwoven/integrations/source/firecrawl/client.rb
|
755
|
+
- lib/multiwoven/integrations/source/firecrawl/config/catalog.json
|
755
756
|
- lib/multiwoven/integrations/source/firecrawl/config/meta.json
|
756
757
|
- lib/multiwoven/integrations/source/firecrawl/config/spec.json
|
757
758
|
- lib/multiwoven/integrations/source/firecrawl/icon.svg
|