multiwoven-integrations 0.29.1 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 61efa047be9009c6caba7c4e7cadde86454e61e193c2a641d0f68c05dc807a7a
4
- data.tar.gz: fcab50ec033298e5d983a247a5f631649df789b82adacfd10b44ada2d0f6cef6
3
+ metadata.gz: c2d7bca699532e6c4911498f3bbd9548ee96f7dc1a36bb6c24629e85b61f2f57
4
+ data.tar.gz: f99005b99170a995b223d3e16a5d79625e021a1156ffca0c96ebd0ca227e1df5
5
5
  SHA512:
6
- metadata.gz: e91cf290679f5bd8ede4251562d8ec04c9b1a6f0a7b137587ddb48048a1a4406514c20f9f263f9fad94b281ab07f26cc5d4e44393e89617b666d3693366fc4e9
7
- data.tar.gz: 696599dd7fe27266d719ffaaa20b004b7173a8af57c55211846ac41651cf370a7d940988ab2287d2aaea3183ccd6b754a93869e1482fd4508c30bafc25197d59
6
+ metadata.gz: 591361ae2cb41fe8ba21f9c910e9d9cc174a9de3c679612ed574745dd34d536fc8052272df89c773854012ddfcfc85a4360eac559b518c3476415bf0ee66cdf0
7
+ data.tar.gz: d44eb9694eb04719415c17a4d861c549d3c7e8e43094f2f4ee82bbb8b3e1ea751c053bff3a9dfc42753e946f4c96fb4f0ba24b134c0a274363b668e5e091783b
@@ -91,6 +91,13 @@ module Multiwoven
91
91
 
92
92
  # Qdrant
93
93
  QDRANT_SEARCH_URL = "%<host>s/collections/%<collection_name>s/points/search"
94
+
95
+ # Firecrawl
96
+ FIRECRAWL_CRAWL_URL = "https://api.firecrawl.dev/v1/crawl"
97
+ FIRECRAWL_SCRAPE_URL = "https://api.firecrawl.dev/v1/scrape"
98
+ FIRECRAWL_CRAWL_ACTIVE_URL = "https://api.firecrawl.dev/v1/crawl/active"
99
+ FIRECRAWL_GET_CRAWL_URL = "https://api.firecrawl.dev/v1/crawl/%<id>s"
100
+ FIRECRAWL_REQUEST_RATE_LIMIT = 5
94
101
  end
95
102
  end
96
103
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Multiwoven
4
4
  module Integrations
5
- VERSION = "0.29.1"
5
+ VERSION = "0.30.0"
6
6
 
7
7
  ENABLED_SOURCES = %w[
8
8
  Snowflake
@@ -30,6 +30,7 @@ module Multiwoven
30
30
  IntuitQuickBooks
31
31
  PineconeDB
32
32
  Qdrant
33
+ Firecrawl
33
34
  ].freeze
34
35
 
35
36
  ENABLED_DESTINATIONS = %w[
@@ -0,0 +1,187 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Multiwoven::Integrations::Source
4
+ module Firecrawl
5
+ include Multiwoven::Integrations::Core
6
+ class Client < SourceConnector
7
+ def check_connection(connection_config)
8
+ connection_config = connection_config.with_indifferent_access
9
+ create_connection(connection_config)
10
+ request = crawl_activity
11
+ if success?(request)
12
+ success_status
13
+ else
14
+ failure_status(nil)
15
+ end
16
+ rescue StandardError => e
17
+ handle_exception(e, { context: "FIRECRAWL:CHECK_CONNECTION:EXCEPTION", type: "error" })
18
+ failure_status(e)
19
+ end
20
+
21
+ def discover(connection_config)
22
+ connection_config = connection_config.with_indifferent_access
23
+ create_connection(connection_config)
24
+ response = execute_scrape(FIRECRAWL_SCRAPE_URL)
25
+ results = JSON.parse(response.body)
26
+ catalog = Catalog.new(streams: create_streams(results))
27
+ catalog.to_multiwoven_message
28
+ rescue StandardError => e
29
+ handle_exception(e, { context: "FIRECRAWL:DISCOVER:EXCEPTION", type: "error" })
30
+ end
31
+
32
+ def read(sync_config)
33
+ connection_config = sync_config.source.connection_specification
34
+ connection_config = connection_config.with_indifferent_access
35
+ query = sync_config.model.query
36
+ url = create_connection(connection_config)
37
+ query(url, query)
38
+ rescue StandardError => e
39
+ handle_exception(e, {
40
+ context: "FIRECRAWL:READ:EXCEPTION",
41
+ type: "error",
42
+ sync_id: sync_config.sync_id,
43
+ sync_run_id: sync_config.sync_run_id
44
+ })
45
+ end
46
+
47
+ private
48
+
49
+ def create_connection(connection_config)
50
+ @base_url = connection_config[:base_url]
51
+ @api_key = connection_config[:api_key]
52
+ @config = if connection_config[:config].present?
53
+ connection_config[:config].transform_values do |value|
54
+ JSON.parse(value)
55
+ rescue JSON::ParserError
56
+ value
57
+ end
58
+ else
59
+ {}
60
+ end
61
+ @config[:url] ||= connection_config[:base_url]
62
+ FIRECRAWL_CRAWL_URL
63
+ end
64
+
65
+ def query(url, _query)
66
+ request = execute_crawl(url)
67
+ request = JSON.parse(request.body)
68
+ crawl_url = get_request_url(request)
69
+ response = get_crawl_result(crawl_url)
70
+ response["data"].map do |row|
71
+ metadata_json = row["metadata"].to_json if row["metadata"]
72
+ metadata_url = row["metadata"]["url"]
73
+ data = {
74
+ "metadata": metadata_json,
75
+ "markdown": row["markdown"],
76
+ "url": metadata_url
77
+ }
78
+ RecordMessage.new(data: data, emitted_at: Time.now.to_i).to_multiwoven_message
79
+ end
80
+ end
81
+
82
+ def execute_crawl(url)
83
+ send_request(
84
+ url: url,
85
+ http_method: HTTP_POST,
86
+ payload: JSON.parse(@config.to_json),
87
+ headers: auth_headers(@api_key),
88
+ config: {}
89
+ )
90
+ end
91
+
92
+ def execute_scrape(url)
93
+ send_request(
94
+ url: url,
95
+ http_method: HTTP_POST,
96
+ payload: JSON.parse({ "url": @base_url }.to_json),
97
+ headers: auth_headers(@api_key),
98
+ config: {}
99
+ )
100
+ end
101
+
102
+ def crawl_activity
103
+ send_request(
104
+ url: FIRECRAWL_CRAWL_ACTIVE_URL,
105
+ http_method: HTTP_GET,
106
+ payload: {},
107
+ headers: auth_headers(@api_key),
108
+ config: {}
109
+ )
110
+ end
111
+
112
+ # This is to make sure the /crawl/{id} was returned in request.
113
+ # If not use /crawl/active to retrieve it.
114
+ def get_request_url(request)
115
+ if request["url"].blank?
116
+ if request["error"].present?
117
+ time = request["error"][/retry after (\d+)s/, 1].to_i
118
+ sleep(time)
119
+ execute_crawl(FIRECRAWL_CRAWL_URL)
120
+ end
121
+ active = crawl_activity
122
+ crawl_active = JSON.parse(active.body)
123
+
124
+ raise "Missing crawl result URL and no active crawl ID available." unless crawl_active["crawls"][-1]["id"].present?
125
+
126
+ crawl_id = crawl_active["crawls"][-1]["id"]
127
+ build_url(FIRECRAWL_GET_CRAWL_URL, crawl_id.to_s)
128
+ else
129
+ request["url"]
130
+ end
131
+ end
132
+
133
+ # Crawl job needs time to finish task. This method will check if the job is complete.
134
+ # If not sleep for 5 seconds and check again.
135
+ def get_crawl_result(url)
136
+ loop do
137
+ response = send_request(
138
+ url: url,
139
+ http_method: HTTP_GET,
140
+ payload: {},
141
+ headers: auth_headers(@api_key),
142
+ config: {}
143
+ )
144
+ response = JSON.parse(response.body)
145
+ return response if response["status"] != "scraping"
146
+
147
+ sleep(FIRECRAWL_REQUEST_RATE_LIMIT)
148
+ end
149
+ end
150
+
151
+ def create_streams(records)
152
+ group_by_table(records).map do |r|
153
+ Multiwoven::Integrations::Protocol::Stream.new(name: r[:tablename], action: StreamAction["fetch"], json_schema: convert_to_json_schema(r[:columns]))
154
+ end
155
+ end
156
+
157
+ def group_by_table(response)
158
+ columns = response["data"].map do |key, value|
159
+ {
160
+ column_name: key,
161
+ data_type: "string",
162
+ is_nullable: value.nil?
163
+ }
164
+ end
165
+
166
+ if response["data"]["metadata"]["url"]
167
+ columns << {
168
+ column_name: "url",
169
+ data_type: "string",
170
+ is_nullable: response["data"]["metadata"]["url"].nil?
171
+ }
172
+ end
173
+
174
+ [
175
+ {
176
+ tablename: "scrape",
177
+ columns: columns
178
+ }
179
+ ]
180
+ end
181
+
182
+ def build_url(url, id)
183
+ format(url, id: id)
184
+ end
185
+ end
186
+ end
187
+ end
@@ -0,0 +1,16 @@
1
+ {
2
+ "data": {
3
+ "name": "Firecrawl",
4
+ "title": "Firecrawl",
5
+ "connector_type": "source",
6
+ "category": "Data Warehouse",
7
+ "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/firecrawl",
8
+ "github_issue_label": "source-firecrawl",
9
+ "icon": "icon.svg",
10
+ "license": "MIT",
11
+ "release_stage": "alpha",
12
+ "support_level": "community",
13
+ "tags": ["language:ruby", "multiwoven"]
14
+ }
15
+ }
16
+
@@ -0,0 +1,31 @@
1
+ {
2
+ "documentation_url": "https://docs.squared.ai/guides/sources/data-sources/firecrawl",
3
+ "stream_type": "dynamic",
4
+ "connector_query_type": "raw_sql",
5
+ "connection_specification": {
6
+ "$schema": "http://json-schema.org/draft-07/schema#",
7
+ "title": "Firecrawl",
8
+ "type": "object",
9
+ "required": ["api_key","base_url"],
10
+ "properties": {
11
+ "api_key": {
12
+ "type": "string",
13
+ "title": "API key",
14
+ "multiwoven_secret": true,
15
+ "order": 0
16
+ },
17
+ "base_url": {
18
+ "type": "string",
19
+ "title": "Base URL",
20
+ "order": 1
21
+ },
22
+ "config": {
23
+ "title": "Configuration",
24
+ "order": 2,
25
+ "additionalProperties": {
26
+ "type": "string"
27
+ }
28
+ }
29
+ }
30
+ }
31
+ }
@@ -0,0 +1,4 @@
1
+ <svg width="2907" height="600" viewBox="0 0 2907 600" fill="none" xmlns="http://www.w3.org/2000/svg">
2
+ <text x="0" y="599.93" font-size="600" dominant-baseline="text-after-edge">🔥</text>
3
+ <path d="M768.456 483V119.364H993.953V166.594H823.321V277.389H977.796V324.442H823.321V483H768.456ZM1052.16 483V210.273H1105.25V483H1052.16ZM1078.97 168.192C1069.74 168.192 1061.81 165.114 1055.18 158.959C1048.67 152.685 1045.42 145.228 1045.42 136.587C1045.42 127.827 1048.67 120.37 1055.18 114.214C1061.81 107.941 1069.74 104.804 1078.97 104.804C1088.21 104.804 1096.08 107.941 1102.59 114.214C1109.22 120.37 1112.53 127.827 1112.53 136.587C1112.53 145.228 1109.22 152.685 1102.59 158.959C1096.08 165.114 1088.21 168.192 1078.97 168.192ZM1171.68 483V210.273H1222.99V253.597H1225.83C1230.8 238.919 1239.56 227.377 1252.11 218.973C1264.77 210.45 1279.1 206.189 1295.08 206.189C1298.39 206.189 1302.3 206.307 1306.8 206.544C1311.41 206.781 1315.02 207.077 1317.63 207.432V258.213C1315.5 257.621 1311.71 256.97 1306.26 256.26C1300.82 255.431 1295.37 255.017 1289.93 255.017C1277.38 255.017 1266.19 257.68 1256.37 263.007C1246.66 268.215 1238.97 275.495 1233.29 284.847C1227.61 294.08 1224.76 304.615 1224.76 316.452V483H1171.68ZM1466.35 488.504C1439.48 488.504 1416.33 482.763 1396.92 471.281C1377.63 459.681 1362.71 443.405 1352.18 422.453C1341.76 401.383 1336.55 376.703 1336.55 348.412C1336.55 320.476 1341.76 295.855 1352.18 274.548C1362.71 253.241 1377.39 236.61 1396.21 224.655C1415.15 212.699 1437.29 206.722 1462.62 206.722C1478.01 206.722 1492.92 209.267 1507.36 214.357C1521.8 219.446 1534.77 227.437 1546.25 238.327C1557.73 249.217 1566.78 263.362 1573.41 280.763C1580.04 298.045 1583.36 319.056 1583.36 343.795V362.616H1366.56V322.844H1531.33C1531.33 308.876 1528.49 296.506 1522.81 285.734C1517.13 274.844 1509.14 266.262 1498.84 259.989C1488.66 253.715 1476.7 250.578 1462.97 250.578C1448.06 250.578 1435.04 254.248 1423.91 261.587C1412.9 268.807 1404.38 278.277 1398.34 289.996C1392.42 301.596 1389.46 314.203 1389.46 327.815V358.888C1389.46 377.117 1392.66 392.624 1399.05 405.408C1405.56 418.192 1414.62 427.957 1426.22 434.705C1437.82 441.333 1451.37 444.648 1466.88 444.648C1476.94 444.648 1486.11 443.227 1494.4 440.386C1502.69 437.427 1509.85 433.047 1515.89 427.247C1521.92 421.447 1526.54 414.286 1529.73 405.763L1579.98 414.818C1575.96 429.615 1568.74 442.576 1558.32 453.703C1548.02 464.712 1535.06 473.294 1519.44 479.449C1503.93 485.486 1486.23 488.504 1466.35 488.504ZM1752.14 488.504C1725.74 488.504 1703.02 482.527 1683.96 470.571C1665.02 458.497 1650.46 441.866 1640.28 420.678C1630.1 399.489 1625.01 375.223 1625.01 347.879C1625.01 320.18 1630.22 295.737 1640.63 274.548C1651.05 253.241 1665.73 236.61 1684.67 224.655C1703.61 212.699 1725.92 206.722 1751.61 206.722C1772.32 206.722 1790.79 210.569 1807.01 218.263C1823.22 225.839 1836.3 236.492 1846.25 250.223C1856.31 263.954 1862.28 279.993 1864.18 298.341H1812.51C1809.67 285.557 1803.16 274.548 1792.98 265.315C1782.92 256.082 1769.42 251.466 1752.5 251.466C1737.7 251.466 1724.74 255.372 1713.61 263.185C1702.6 270.879 1694.02 281.887 1687.86 296.21C1681.71 310.415 1678.63 327.223 1678.63 346.636C1678.63 366.523 1681.65 383.687 1687.69 398.128C1693.72 412.569 1702.25 423.755 1713.26 431.686C1724.38 439.617 1737.46 443.582 1752.5 443.582C1762.56 443.582 1771.67 441.748 1779.84 438.078C1788.13 434.29 1795.05 428.904 1800.61 421.92C1806.3 414.937 1810.26 406.532 1812.51 396.707H1864.18C1862.28 414.345 1856.54 430.088 1846.96 443.938C1837.37 457.787 1824.52 468.677 1808.43 476.608C1792.45 484.539 1773.68 488.504 1752.14 488.504ZM1915.95 483V210.273H1967.27V253.597H1970.11C1975.08 238.919 1983.84 227.377 1996.39 218.973C2009.05 210.45 2023.37 206.189 2039.35 206.189C2042.67 206.189 2046.58 206.307 2051.07 206.544C2055.69 206.781 2059.3 207.077 2061.9 207.432V258.213C2059.77 257.621 2055.99 256.97 2050.54 256.26C2045.1 255.431 2039.65 255.017 2034.21 255.017C2021.66 255.017 2010.47 257.68 2000.65 263.007C1990.94 268.215 1983.25 275.495 1977.56 284.847C1971.88 294.08 1969.04 304.615 1969.04 316.452V483H1915.95ZM2179.77 489.037C2162.49 489.037 2146.87 485.841 2132.9 479.449C2118.93 472.938 2107.86 463.528 2099.7 451.217C2091.65 438.907 2087.62 423.814 2087.62 405.94C2087.62 390.552 2090.58 377.886 2096.5 367.943C2102.42 358 2110.41 350.128 2120.47 344.328C2130.53 338.528 2141.78 334.148 2154.21 331.189C2166.63 328.23 2179.3 325.981 2192.2 324.442C2208.54 322.548 2221.8 321.009 2231.98 319.825C2242.16 318.523 2249.55 316.452 2254.17 313.611C2258.79 310.77 2261.09 306.153 2261.09 299.761V298.518C2261.09 283.012 2256.71 270.997 2247.96 262.474C2239.31 253.952 2226.41 249.69 2209.25 249.69C2191.37 249.69 2177.29 253.656 2166.99 261.587C2156.81 269.399 2149.77 278.099 2145.86 287.688L2095.97 276.324C2101.89 259.752 2110.53 246.376 2121.89 236.196C2133.37 225.898 2146.57 218.44 2161.49 213.824C2176.4 209.089 2192.08 206.722 2208.54 206.722C2219.43 206.722 2230.97 208.024 2243.16 210.628C2255.47 213.114 2266.95 217.73 2277.61 224.477C2288.38 231.224 2297.2 240.872 2304.06 253.419C2310.93 265.848 2314.36 282.006 2314.36 301.892V483H2262.52V445.713H2260.38C2256.95 452.579 2251.8 459.326 2244.94 465.955C2238.07 472.583 2229.25 478.088 2218.48 482.467C2207.71 486.847 2194.81 489.037 2179.77 489.037ZM2191.31 446.423C2205.99 446.423 2218.54 443.523 2228.96 437.723C2239.49 431.923 2247.48 424.347 2252.93 414.996C2258.49 405.526 2261.27 395.405 2261.27 384.634V349.477C2259.38 351.371 2255.71 353.147 2250.26 354.804C2244.94 356.343 2238.84 357.704 2231.98 358.888C2225.11 359.953 2218.42 360.959 2211.91 361.906C2205.4 362.735 2199.96 363.445 2195.58 364.037C2185.28 365.339 2175.87 367.529 2167.34 370.607C2158.94 373.684 2152.19 378.123 2147.1 383.923C2142.13 389.605 2139.65 397.181 2139.65 406.651C2139.65 419.79 2144.5 429.733 2154.21 436.48C2163.91 443.109 2176.28 446.423 2191.31 446.423ZM2439.2 483L2358.94 210.273H2413.81L2467.25 410.557H2469.92L2523.54 210.273H2578.4L2631.67 409.669H2634.34L2687.42 210.273H2742.29L2662.21 483H2608.06L2552.66 286.089H2548.58L2493.18 483H2439.2ZM2840.76 119.364V483H2787.67V119.364H2840.76Z" fill="#36322F"/>
4
+ </svg>
@@ -40,6 +40,7 @@ require "MailchimpMarketing"
40
40
  require "aws-sdk-bedrockruntime"
41
41
  require "pinecone"
42
42
  require "intuit-oauth"
43
+ require "nokogiri"
43
44
 
44
45
  # Service
45
46
  require_relative "integrations/config"
@@ -88,6 +89,7 @@ require_relative "integrations/source/generic_open_ai/client"
88
89
  require_relative "integrations/source/intuit_quick_books/client"
89
90
  require_relative "integrations/source/pinecone_db/client"
90
91
  require_relative "integrations/source/qdrant/client"
92
+ require_relative "integrations/source/firecrawl/client"
91
93
 
92
94
  # Destination
93
95
  require_relative "integrations/destination/klaviyo/client"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multiwoven-integrations
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.29.1
4
+ version: 0.30.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Subin T P
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-06-17 00:00:00.000000000 Z
11
+ date: 2025-06-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -751,6 +751,10 @@ files:
751
751
  - lib/multiwoven/integrations/source/databrics_model/config/meta.json
752
752
  - lib/multiwoven/integrations/source/databrics_model/config/spec.json
753
753
  - lib/multiwoven/integrations/source/databrics_model/icon.svg
754
+ - lib/multiwoven/integrations/source/firecrawl/client.rb
755
+ - lib/multiwoven/integrations/source/firecrawl/config/meta.json
756
+ - lib/multiwoven/integrations/source/firecrawl/config/spec.json
757
+ - lib/multiwoven/integrations/source/firecrawl/icon.svg
754
758
  - lib/multiwoven/integrations/source/generic_open_ai/client.rb
755
759
  - lib/multiwoven/integrations/source/generic_open_ai/config/catalog.json
756
760
  - lib/multiwoven/integrations/source/generic_open_ai/config/meta.json