multiwoven-integrations 0.30.0 → 0.30.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/lib/multiwoven/integrations/rollout.rb +1 -1
  3. data/lib/multiwoven/integrations/source/amazon_s3/config/meta.json +1 -0
  4. data/lib/multiwoven/integrations/source/anthropic/config/meta.json +1 -0
  5. data/lib/multiwoven/integrations/source/aws_athena/config/meta.json +1 -0
  6. data/lib/multiwoven/integrations/source/aws_bedrock_model/config/meta.json +1 -0
  7. data/lib/multiwoven/integrations/source/aws_sagemaker_model/config/meta.json +1 -0
  8. data/lib/multiwoven/integrations/source/bigquery/config/meta.json +1 -0
  9. data/lib/multiwoven/integrations/source/clickhouse/config/meta.json +1 -0
  10. data/lib/multiwoven/integrations/source/databricks/config/meta.json +1 -0
  11. data/lib/multiwoven/integrations/source/databrics_model/config/meta.json +1 -0
  12. data/lib/multiwoven/integrations/source/firecrawl/client.rb +14 -50
  13. data/lib/multiwoven/integrations/source/firecrawl/config/catalog.json +26 -0
  14. data/lib/multiwoven/integrations/source/firecrawl/config/meta.json +1 -0
  15. data/lib/multiwoven/integrations/source/generic_open_ai/config/meta.json +1 -0
  16. data/lib/multiwoven/integrations/source/google_vertex_model/config/meta.json +1 -0
  17. data/lib/multiwoven/integrations/source/http_model/config/meta.json +1 -0
  18. data/lib/multiwoven/integrations/source/intuit_quick_books/config/meta.json +1 -0
  19. data/lib/multiwoven/integrations/source/maria_db/config/meta.json +1 -0
  20. data/lib/multiwoven/integrations/source/open_ai/config/meta.json +1 -0
  21. data/lib/multiwoven/integrations/source/oracle_db/config/meta.json +1 -0
  22. data/lib/multiwoven/integrations/source/pinecone_db/config/meta.json +1 -0
  23. data/lib/multiwoven/integrations/source/postgresql/config/meta.json +1 -0
  24. data/lib/multiwoven/integrations/source/qdrant/config/meta.json +1 -0
  25. data/lib/multiwoven/integrations/source/redshift/config/meta.json +1 -0
  26. data/lib/multiwoven/integrations/source/salesforce_consumer_goods_cloud/config/meta.json +1 -0
  27. data/lib/multiwoven/integrations/source/sftp/config/meta.json +1 -0
  28. data/lib/multiwoven/integrations/source/snowflake/config/meta.json +1 -0
  29. data/lib/multiwoven/integrations/source/watsonx_ai/config/meta.json +1 -0
  30. data/lib/multiwoven/integrations/source/watsonx_data/config/meta.json +1 -0
  31. metadata +3 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c2d7bca699532e6c4911498f3bbd9548ee96f7dc1a36bb6c24629e85b61f2f57
4
- data.tar.gz: f99005b99170a995b223d3e16a5d79625e021a1156ffca0c96ebd0ca227e1df5
3
+ metadata.gz: 51616b2e7b06336873aceb0c9e5731526dd7e57ba332eab190bc215d99c4ed8d
4
+ data.tar.gz: d376ae2826566eea31fd92dbd4e4ddf16034e1ef9085edf08f7b554744424583
5
5
  SHA512:
6
- metadata.gz: 591361ae2cb41fe8ba21f9c910e9d9cc174a9de3c679612ed574745dd34d536fc8052272df89c773854012ddfcfc85a4360eac559b518c3476415bf0ee66cdf0
7
- data.tar.gz: d44eb9694eb04719415c17a4d861c549d3c7e8e43094f2f4ee82bbb8b3e1ea751c053bff3a9dfc42753e946f4c96fb4f0ba24b134c0a274363b668e5e091783b
6
+ metadata.gz: fc9778e2da0499ed9bec602005b3b69aadd8e038e251d77acc8407bde99dbf213c1ff0d6f937a832c1d4aa660dbae578de8042caf9c0720fca3ddcff2e0d762a
7
+ data.tar.gz: a8438815522f82a1bc6db95c6bd29c94a55f182bf459f90c3e01209f54b17e303d939280a63549f6c1eff5da4e57ca1f80e85b98646f6f935995f3a371f7b274
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Multiwoven
4
4
  module Integrations
5
- VERSION = "0.30.0"
5
+ VERSION = "0.30.2"
6
6
 
7
7
  ENABLED_SOURCES = %w[
8
8
  Snowflake
@@ -4,6 +4,7 @@
4
4
  "title": "Amazon S3",
5
5
  "connector_type": "source",
6
6
  "category": "Data Lake",
7
+ "sub_category": "Relational Database",
7
8
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/amazon_s3",
8
9
  "github_issue_label": "source-amazons3",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "Anthropic Model Endpoint",
5
5
  "connector_type": "source",
6
6
  "category": "AI Model",
7
+ "sub_category": "LLM",
7
8
  "documentation_url": "https://docs.squared.ai/activation/ai-ml-sources/anthropic-model",
8
9
  "github_issue_label": "source-anthropic-model",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "AWS Athena",
5
5
  "connector_type": "source",
6
6
  "category": "Data Warehouse",
7
+ "sub_category": "Relational Database",
7
8
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/aws_athena",
8
9
  "github_issue_label": "source-aws-athena",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "AWS Bedrock Model",
5
5
  "connector_type": "source",
6
6
  "category": "AI Model",
7
+ "sub_category": "AI_ML Service",
7
8
  "documentation_url": "https://docs.squared.ai/activation/ai-ml-sources/aws_bedrock-model",
8
9
  "github_issue_label": "source-aws-bedrock-model",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "AWS Sagemaker Model",
5
5
  "connector_type": "source",
6
6
  "category": "AI Model",
7
+ "sub_category": "AI_ML Service",
7
8
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/aws_sagemaker-model",
8
9
  "github_issue_label": "source-aws-sagemaker-model",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "Google BigQuery",
5
5
  "connector_type": "source",
6
6
  "category": "Data Warehouse",
7
+ "sub_category": "Relational Database",
7
8
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/bquery",
8
9
  "github_issue_label": "source-bigquery",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "ClickHouse",
5
5
  "connector_type": "source",
6
6
  "category": "Data Warehouse",
7
+ "sub_category": "Relational Database",
7
8
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/clickhouse",
8
9
  "github_issue_label": "source-clickhouse",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "Databricks Data Warehouse",
5
5
  "connector_type": "source",
6
6
  "category": "Data Warehouse",
7
+ "sub_category": "Relational Database",
7
8
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/databricks",
8
9
  "github_issue_label": "source-databricks",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "Databricks Model",
5
5
  "connector_type": "source",
6
6
  "category": "AI Model",
7
+ "sub_category": "AI_ML Service",
7
8
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/databricks-model",
8
9
  "github_issue_label": "source-databricks-foundation",
9
10
  "icon": "icon.svg",
@@ -18,12 +18,9 @@ module Multiwoven::Integrations::Source
18
18
  failure_status(e)
19
19
  end
20
20
 
21
- def discover(connection_config)
22
- connection_config = connection_config.with_indifferent_access
23
- create_connection(connection_config)
24
- response = execute_scrape(FIRECRAWL_SCRAPE_URL)
25
- results = JSON.parse(response.body)
26
- catalog = Catalog.new(streams: create_streams(results))
21
+ def discover(_connection_config = nil)
22
+ catalog_json = read_json(CATALOG_SPEC_PATH)
23
+ catalog = build_catalog(catalog_json)
27
24
  catalog.to_multiwoven_message
28
25
  rescue StandardError => e
29
26
  handle_exception(e, { context: "FIRECRAWL:DISCOVER:EXCEPTION", type: "error" })
@@ -32,9 +29,8 @@ module Multiwoven::Integrations::Source
32
29
  def read(sync_config)
33
30
  connection_config = sync_config.source.connection_specification
34
31
  connection_config = connection_config.with_indifferent_access
35
- query = sync_config.model.query
36
32
  url = create_connection(connection_config)
37
- query(url, query)
33
+ query(url, nil)
38
34
  rescue StandardError => e
39
35
  handle_exception(e, {
40
36
  context: "FIRECRAWL:READ:EXCEPTION",
@@ -62,7 +58,16 @@ module Multiwoven::Integrations::Source
62
58
  FIRECRAWL_CRAWL_URL
63
59
  end
64
60
 
65
- def query(url, _query)
61
+ def query(url, query)
62
+ has_limit = query.match(/LIMIT\s+(\d+)\s*$/i) if query.present?
63
+ if has_limit.present?
64
+ if @config["includePaths"]&.any?
65
+ path = @config["includePaths"].first
66
+ @config["url"] = URI.join(@config["url"], path).to_s
67
+ end
68
+ @config.delete("includePaths")
69
+ @config[:limit] = has_limit[1].to_i
70
+ end
66
71
  request = execute_crawl(url)
67
72
  request = JSON.parse(request.body)
68
73
  crawl_url = get_request_url(request)
@@ -89,16 +94,6 @@ module Multiwoven::Integrations::Source
89
94
  )
90
95
  end
91
96
 
92
- def execute_scrape(url)
93
- send_request(
94
- url: url,
95
- http_method: HTTP_POST,
96
- payload: JSON.parse({ "url": @base_url }.to_json),
97
- headers: auth_headers(@api_key),
98
- config: {}
99
- )
100
- end
101
-
102
97
  def crawl_activity
103
98
  send_request(
104
99
  url: FIRECRAWL_CRAWL_ACTIVE_URL,
@@ -148,37 +143,6 @@ module Multiwoven::Integrations::Source
148
143
  end
149
144
  end
150
145
 
151
- def create_streams(records)
152
- group_by_table(records).map do |r|
153
- Multiwoven::Integrations::Protocol::Stream.new(name: r[:tablename], action: StreamAction["fetch"], json_schema: convert_to_json_schema(r[:columns]))
154
- end
155
- end
156
-
157
- def group_by_table(response)
158
- columns = response["data"].map do |key, value|
159
- {
160
- column_name: key,
161
- data_type: "string",
162
- is_nullable: value.nil?
163
- }
164
- end
165
-
166
- if response["data"]["metadata"]["url"]
167
- columns << {
168
- column_name: "url",
169
- data_type: "string",
170
- is_nullable: response["data"]["metadata"]["url"].nil?
171
- }
172
- end
173
-
174
- [
175
- {
176
- tablename: "scrape",
177
- columns: columns
178
- }
179
- ]
180
- end
181
-
182
146
  def build_url(url, id)
183
147
  format(url, id: id)
184
148
  end
@@ -0,0 +1,26 @@
1
+ {
2
+ "request_rate_limit": 15,
3
+ "request_rate_limit_unit": "minute",
4
+ "request_rate_concurrency": 5,
5
+ "streams": [
6
+ {
7
+ "name": "scrape",
8
+ "action": "fetch",
9
+ "json_schema": {
10
+ "type": "object",
11
+ "properties": {
12
+ "markdown": {
13
+ "type": "string"
14
+ },
15
+ "metadata": {
16
+ "type": "string"
17
+ },
18
+ "url": {
19
+ "type": "string"
20
+ }
21
+ }
22
+ },
23
+ "supported_sync_modes": ["incremental"]
24
+ }
25
+ ]
26
+ }
@@ -4,6 +4,7 @@
4
4
  "title": "Firecrawl",
5
5
  "connector_type": "source",
6
6
  "category": "Data Warehouse",
7
+ "sub_category": "Web Scraper",
7
8
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/firecrawl",
8
9
  "github_issue_label": "source-firecrawl",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "Generic OpenAI Spec Endpoint",
5
5
  "connector_type": "source",
6
6
  "category": "AI Model",
7
+ "sub_category": "AI_ML Service",
7
8
  "documentation_url": "https://docs.squared.ai/activation/ai-ml-sources/generic_open_ai-endpoint",
8
9
  "github_issue_label": "source-generic-open-ai-model",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "Google Vertex",
5
5
  "connector_type": "source",
6
6
  "category": "AI Model",
7
+ "sub_category": "AI_ML Service",
7
8
  "documentation_url": "https://docs.squared.ai/activation/ai-ml-sources/google_vertex-model",
8
9
  "github_issue_label": "source-vertex",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "HTTP Model Endpoint",
5
5
  "connector_type": "source",
6
6
  "category": "AI Model",
7
+ "sub_category": "AI_ML Service",
7
8
  "documentation_url": "https://docs.squared.ai/activation/ai-ml-sources/http-model-endpoint",
8
9
  "github_issue_label": "source-http-model",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "Intuit QuickBooks",
5
5
  "connector_type": "source",
6
6
  "category": "Data Warehouse",
7
+ "sub_category": "Relational Database",
7
8
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/intuit_quickbooks",
8
9
  "github_issue_label": "source-intuit-quickbooks",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "Maria DB",
5
5
  "connector_type": "source",
6
6
  "category": "Data Warehouse",
7
+ "sub_category": "Relational Database",
7
8
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/maria_db",
8
9
  "github_issue_label": "source-maria-db",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "OpenAI Model Endpoint",
5
5
  "connector_type": "source",
6
6
  "category": "AI Model",
7
+ "sub_category": "LLM",
7
8
  "documentation_url": "https://docs.squared.ai/activation/ai-ml-sources/open_ai-model",
8
9
  "github_issue_label": "source-open-ai-model",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "Oracle DB",
5
5
  "connector_type": "source",
6
6
  "category": "Database",
7
+ "sub_category": "Relational Database",
7
8
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/oracle",
8
9
  "github_issue_label": "source-oracle",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "Pinecone DB",
5
5
  "connector_type": "source",
6
6
  "category": "Database",
7
+ "sub_category": "Vector Database",
7
8
  "documentation_url": "https://docs.squared.ai/activation/vector_search/pinecone_db",
8
9
  "github_issue_label": "source-pinecone-db",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "PostgreSQL",
5
5
  "connector_type": "source",
6
6
  "category": "Data Warehouse",
7
+ "sub_category": "Relational Database",
7
8
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/postgresql",
8
9
  "github_issue_label": "source-postgresql",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "Qdrant",
5
5
  "connector_type": "source",
6
6
  "category": "Database",
7
+ "sub_category": "Vector Database",
7
8
  "documentation_url": "https://docs.squared.ai/activation/vector_search/qdrant",
8
9
  "github_issue_label": "source-qdrant",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "Amazon Redshift",
5
5
  "connector_type": "source",
6
6
  "category": "Data Warehouse",
7
+ "sub_category": "Relational Database",
7
8
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/redshift",
8
9
  "github_issue_label": "source-redshift",
9
10
  "icon": "icon.svg",
@@ -6,6 +6,7 @@
6
6
  "connector_sub_type": "api",
7
7
  "direct_query_support": true,
8
8
  "category": "Retail",
9
+ "sub_category": "Relational Database",
9
10
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/salesforce-consumer-goods-cloud",
10
11
  "github_issue_label": "source-salesforce-consumer-goods",
11
12
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "SFTP",
5
5
  "connector_type": "source",
6
6
  "category": "File Storage",
7
+ "sub_category": "Relational Database",
7
8
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/sftp",
8
9
  "github_issue_label": "source-sftp",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "Snowflake",
5
5
  "connector_type": "source",
6
6
  "category": "Data Warehouse",
7
+ "sub_category": "Relational Database",
7
8
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/snowflake",
8
9
  "github_issue_label": "source-snowflake",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "WatsonX AI Model Endpoint",
5
5
  "connector_type": "source",
6
6
  "category": "AI Model",
7
+ "sub_category": "AI_ML Service",
7
8
  "documentation_url": "https://docs.squared.ai/activation/ai-ml-sources/watsonx_ai-model",
8
9
  "github_issue_label": "source-watsonx-ai-model",
9
10
  "icon": "icon.svg",
@@ -4,6 +4,7 @@
4
4
  "title": "WatsonX Data Endpoint",
5
5
  "connector_type": "source",
6
6
  "category": "Data Warehouse",
7
+ "sub_category": "Relational Database",
7
8
  "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/watsonx_data",
8
9
  "github_issue_label": "source-watsonx-data-endpoint",
9
10
  "icon": "icon.svg",
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multiwoven-integrations
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.30.0
4
+ version: 0.30.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Subin T P
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-06-25 00:00:00.000000000 Z
11
+ date: 2025-06-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -752,6 +752,7 @@ files:
752
752
  - lib/multiwoven/integrations/source/databrics_model/config/spec.json
753
753
  - lib/multiwoven/integrations/source/databrics_model/icon.svg
754
754
  - lib/multiwoven/integrations/source/firecrawl/client.rb
755
+ - lib/multiwoven/integrations/source/firecrawl/config/catalog.json
755
756
  - lib/multiwoven/integrations/source/firecrawl/config/meta.json
756
757
  - lib/multiwoven/integrations/source/firecrawl/config/spec.json
757
758
  - lib/multiwoven/integrations/source/firecrawl/icon.svg