multiwoven-integrations 0.30.1 → 0.30.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 77b60f06178a034c3b84ac94d582f500a25f2e987c6a3b813b8220e52bf2958c
4
- data.tar.gz: abd9af3678499e495b4f6b6650db793aa9a29a1a7781ff84caad07d1826e1b88
3
+ metadata.gz: 9fd3ed70334fa8ca4cf73fa9c6e47f253e372d9b9358ee3a1004de5880e4656b
4
+ data.tar.gz: 25a4d2b8442bd87197f84cc7c36fe3bbb7864cc0f329312fba37d9742cbc8bee
5
5
  SHA512:
6
- metadata.gz: f4535cf7f29ee0dec79736d292018fa18c5d0126e7603401f07dec97a10a8b63efb1f616afcf0b230cd2858f8a7979b2e191d6b169d6c2fb41ea06c9bf64eded
7
- data.tar.gz: b4a3288ae74b92dfcf729d970db0a2cadd670aee1a5ee83f536bab6c42c07394a587b18a2e48fcf913257d0e6d6e97d6283aecf342c3b43b14ae0e53ac93dd95
6
+ metadata.gz: '08384a20d53b8b87472c509aedfaf37d68b3445a722686ba49b7fd86a4ef044eeeae7a60fa6cc1191f475b0c221ee6937a01ed635b5df784c1f5c494a9828cf5'
7
+ data.tar.gz: '085bb3fdb255ae339486d00e4747c9e97d5784e5b344dac3e5870c99229dfa71693e8d11831d9661cade3a879a42998e7c650638585be9c152e1b5a4d326702a'
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Multiwoven
4
4
  module Integrations
5
- VERSION = "0.30.1"
5
+ VERSION = "0.30.3"
6
6
 
7
7
  ENABLED_SOURCES = %w[
8
8
  Snowflake
@@ -18,12 +18,9 @@ module Multiwoven::Integrations::Source
18
18
  failure_status(e)
19
19
  end
20
20
 
21
- def discover(connection_config)
22
- connection_config = connection_config.with_indifferent_access
23
- create_connection(connection_config)
24
- response = execute_scrape(FIRECRAWL_SCRAPE_URL)
25
- results = JSON.parse(response.body)
26
- catalog = Catalog.new(streams: create_streams(results))
21
+ def discover(_connection_config = nil)
22
+ catalog_json = read_json(CATALOG_SPEC_PATH)
23
+ catalog = build_catalog(catalog_json)
27
24
  catalog.to_multiwoven_message
28
25
  rescue StandardError => e
29
26
  handle_exception(e, { context: "FIRECRAWL:DISCOVER:EXCEPTION", type: "error" })
@@ -32,9 +29,8 @@ module Multiwoven::Integrations::Source
32
29
  def read(sync_config)
33
30
  connection_config = sync_config.source.connection_specification
34
31
  connection_config = connection_config.with_indifferent_access
35
- query = sync_config.model.query
36
32
  url = create_connection(connection_config)
37
- query(url, query)
33
+ query(url, nil, nil)
38
34
  rescue StandardError => e
39
35
  handle_exception(e, {
40
36
  context: "FIRECRAWL:READ:EXCEPTION",
@@ -62,7 +58,15 @@ module Multiwoven::Integrations::Source
62
58
  FIRECRAWL_CRAWL_URL
63
59
  end
64
60
 
65
- def query(url, _query)
61
+ def query(url, _query, limit = 1)
62
+ if limit.present?
63
+ if @config["includePaths"]&.any?
64
+ path = @config["includePaths"].first
65
+ @config["url"] = URI.join(@config["url"], path).to_s
66
+ end
67
+ @config.delete("includePaths")
68
+ @config[:limit] = limit
69
+ end
66
70
  request = execute_crawl(url)
67
71
  request = JSON.parse(request.body)
68
72
  crawl_url = get_request_url(request)
@@ -89,16 +93,6 @@ module Multiwoven::Integrations::Source
89
93
  )
90
94
  end
91
95
 
92
- def execute_scrape(url)
93
- send_request(
94
- url: url,
95
- http_method: HTTP_POST,
96
- payload: JSON.parse({ "url": @base_url }.to_json),
97
- headers: auth_headers(@api_key),
98
- config: {}
99
- )
100
- end
101
-
102
96
  def crawl_activity
103
97
  send_request(
104
98
  url: FIRECRAWL_CRAWL_ACTIVE_URL,
@@ -148,37 +142,6 @@ module Multiwoven::Integrations::Source
148
142
  end
149
143
  end
150
144
 
151
- def create_streams(records)
152
- group_by_table(records).map do |r|
153
- Multiwoven::Integrations::Protocol::Stream.new(name: r[:tablename], action: StreamAction["fetch"], json_schema: convert_to_json_schema(r[:columns]))
154
- end
155
- end
156
-
157
- def group_by_table(response)
158
- columns = response["data"].map do |key, value|
159
- {
160
- column_name: key,
161
- data_type: "string",
162
- is_nullable: value.nil?
163
- }
164
- end
165
-
166
- if response["data"]["metadata"]["url"]
167
- columns << {
168
- column_name: "url",
169
- data_type: "string",
170
- is_nullable: response["data"]["metadata"]["url"].nil?
171
- }
172
- end
173
-
174
- [
175
- {
176
- tablename: "scrape",
177
- columns: columns
178
- }
179
- ]
180
- end
181
-
182
145
  def build_url(url, id)
183
146
  format(url, id: id)
184
147
  end
@@ -0,0 +1,26 @@
1
+ {
2
+ "request_rate_limit": 15,
3
+ "request_rate_limit_unit": "minute",
4
+ "request_rate_concurrency": 5,
5
+ "streams": [
6
+ {
7
+ "name": "scrape",
8
+ "action": "fetch",
9
+ "json_schema": {
10
+ "type": "object",
11
+ "properties": {
12
+ "markdown": {
13
+ "type": "string"
14
+ },
15
+ "metadata": {
16
+ "type": "string"
17
+ },
18
+ "url": {
19
+ "type": "string"
20
+ }
21
+ }
22
+ },
23
+ "supported_sync_modes": ["incremental"]
24
+ }
25
+ ]
26
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multiwoven-integrations
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.30.1
4
+ version: 0.30.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Subin T P
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-06-27 00:00:00.000000000 Z
11
+ date: 2025-06-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -752,6 +752,7 @@ files:
752
752
  - lib/multiwoven/integrations/source/databrics_model/config/spec.json
753
753
  - lib/multiwoven/integrations/source/databrics_model/icon.svg
754
754
  - lib/multiwoven/integrations/source/firecrawl/client.rb
755
+ - lib/multiwoven/integrations/source/firecrawl/config/catalog.json
755
756
  - lib/multiwoven/integrations/source/firecrawl/config/meta.json
756
757
  - lib/multiwoven/integrations/source/firecrawl/config/spec.json
757
758
  - lib/multiwoven/integrations/source/firecrawl/icon.svg