vector_amp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +201 -0
- data/NOTICE +8 -0
- data/README.md +377 -0
- data/lib/vector_amp/client.rb +61 -0
- data/lib/vector_amp/connections.rb +49 -0
- data/lib/vector_amp/dataset.rb +182 -0
- data/lib/vector_amp/datasets.rb +237 -0
- data/lib/vector_amp/embedding.rb +67 -0
- data/lib/vector_amp/error.rb +17 -0
- data/lib/vector_amp/ingestion.rb +416 -0
- data/lib/vector_amp/intelligence.rb +101 -0
- data/lib/vector_amp/schedules.rb +81 -0
- data/lib/vector_amp/source.rb +366 -0
- data/lib/vector_amp/transport/base.rb +11 -0
- data/lib/vector_amp/transport/http.rb +149 -0
- data/lib/vector_amp/utils.rb +54 -0
- data/lib/vector_amp/version.rb +5 -0
- data/lib/vector_amp.rb +10 -0
- metadata +150 -0
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "uri"
|
|
4
|
+
require_relative "utils"
|
|
5
|
+
|
|
6
|
+
module VectorAmp
|
|
7
|
+
# Base value object for ingestion source definitions.
|
|
8
|
+
#
|
|
9
|
+
# Source objects can be passed to `client.sources.create_source(source)` to
|
|
10
|
+
# create a source or to `dataset.ingest_source(source)` once they include an id
|
|
11
|
+
# returned by the API.
|
|
12
|
+
class Source
|
|
13
|
+
SUPPORTED_SOURCE_TYPES = %w[s3 web gcs gdrive file_upload jira confluence].freeze
|
|
14
|
+
|
|
15
|
+
# @return [String, nil] API source id when returned by the API.
|
|
16
|
+
# @return [String] source type (`s3`, `web`, `gdrive`, or `file_upload`).
|
|
17
|
+
# @return [String] source display name.
|
|
18
|
+
# @return [String, nil] optional source description.
|
|
19
|
+
# @return [Hash] source-specific configuration.
|
|
20
|
+
# @return [Hash, nil] optional source metadata.
|
|
21
|
+
attr_reader :id, :source_type, :name, :description, :config, :metadata
|
|
22
|
+
|
|
23
|
+
# Create a source value object.
|
|
24
|
+
# @param source_type [String, Symbol] one of `s3`, `web`, `gdrive`, or `file_upload`.
|
|
25
|
+
# @param name [String] source display name.
|
|
26
|
+
# @param config [Hash] source-specific config sent to the API.
|
|
27
|
+
# @param description [String, nil] optional description.
|
|
28
|
+
# @param metadata [Hash, nil] optional metadata.
|
|
29
|
+
# @param id [String, nil] optional API source id.
|
|
30
|
+
# @return [Source]
|
|
31
|
+
def initialize(source_type:, name:, config:, description: nil, metadata: nil, id: nil)
|
|
32
|
+
raise ArgumentError, "source_type is required" if source_type.nil? || source_type.to_s.empty?
|
|
33
|
+
raise ArgumentError, "name is required" if name.nil? || name.to_s.empty?
|
|
34
|
+
raise ArgumentError, "config must be a Hash" unless config.is_a?(Hash)
|
|
35
|
+
|
|
36
|
+
@id = id
|
|
37
|
+
@source_type = source_type.to_s
|
|
38
|
+
@name = name.to_s
|
|
39
|
+
@description = description
|
|
40
|
+
@config = config
|
|
41
|
+
@metadata = metadata
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Build a generic source object from an API response hash.
|
|
45
|
+
# @param data [Hash] source payload returned by the API.
|
|
46
|
+
# @return [GenericSource]
|
|
47
|
+
def self.from_api(data)
|
|
48
|
+
hash = normalize_hash(data)
|
|
49
|
+
GenericSource.new(
|
|
50
|
+
id: hash["id"],
|
|
51
|
+
source_type: hash.fetch("source_type"),
|
|
52
|
+
name: hash.fetch("name"),
|
|
53
|
+
description: hash["description"],
|
|
54
|
+
config: hash.fetch("config", {}),
|
|
55
|
+
metadata: hash["metadata"]
|
|
56
|
+
)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Read a source attribute by string or symbol key.
|
|
60
|
+
# @param key [String, Symbol] attribute name.
|
|
61
|
+
# @return [Object, nil]
|
|
62
|
+
def [](key)
|
|
63
|
+
to_h[key.to_sym] || to_h[key.to_s]
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Convert this source to a hash including the id when present.
|
|
67
|
+
# @return [Hash]
|
|
68
|
+
def to_h
|
|
69
|
+
Utils.compact_hash(
|
|
70
|
+
id: id,
|
|
71
|
+
source_type: source_type,
|
|
72
|
+
name: name,
|
|
73
|
+
description: description,
|
|
74
|
+
config: config,
|
|
75
|
+
metadata: metadata
|
|
76
|
+
)
|
|
77
|
+
end
|
|
78
|
+
alias to_hash to_h
|
|
79
|
+
|
|
80
|
+
# Convert this source to an API create-source request body.
|
|
81
|
+
# @return [Hash]
|
|
82
|
+
def to_create_body
|
|
83
|
+
Utils.compact_hash(
|
|
84
|
+
source_type: source_type,
|
|
85
|
+
name: name,
|
|
86
|
+
description: description,
|
|
87
|
+
config: config,
|
|
88
|
+
metadata: metadata
|
|
89
|
+
)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def inspect
|
|
93
|
+
"#<#{self.class} id=#{id.inspect} source_type=#{source_type.inspect} name=#{name.inspect}>"
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def self.normalize_hash(value)
|
|
97
|
+
case value
|
|
98
|
+
when Hash
|
|
99
|
+
value.each_with_object({}) { |(key, item), memo| memo[key.to_s] = item }
|
|
100
|
+
else
|
|
101
|
+
{}
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Default source-name helpers used when a name is omitted.
|
|
107
|
+
module SourceNames
|
|
108
|
+
module_function
|
|
109
|
+
|
|
110
|
+
# @return [String] timestamped `ruby-sdk-file-upload-YYYYmmddHHMMSS` name.
|
|
111
|
+
def file_upload(now: Time.now.utc)
|
|
112
|
+
"ruby-sdk-file-upload-#{now.strftime("%Y%m%d%H%M%S")}"
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# @param start_urls [String, Array<String>] source URLs.
|
|
116
|
+
# @return [String] `web-<host>` from the first URL, or `web-source`.
|
|
117
|
+
def web(start_urls)
|
|
118
|
+
first_url = Array(start_urls).first.to_s
|
|
119
|
+
host = URI.parse(first_url).host
|
|
120
|
+
host && !host.empty? ? "web-#{host}" : "web-source"
|
|
121
|
+
rescue URI::InvalidURIError
|
|
122
|
+
"web-source"
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# @param bucket [String] bucket name.
|
|
126
|
+
# @param prefix [String, nil] optional prefix.
|
|
127
|
+
# @return [String] `s3-<bucket>` or `s3-<bucket>-<prefix>`.
|
|
128
|
+
def s3(bucket, prefix = nil)
|
|
129
|
+
parts = ["s3", bucket.to_s, prefix.to_s.delete_suffix("/")].reject(&:empty?)
|
|
130
|
+
parts.join("-")
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def gcs(bucket, prefix = nil)
|
|
134
|
+
parts = ["gcs", bucket.to_s, prefix.to_s.delete_suffix("/")].reject(&:empty?)
|
|
135
|
+
parts.join("-")
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def jira(project_keys: nil, cloud_id: nil)
|
|
139
|
+
key = Array(project_keys).first || cloud_id
|
|
140
|
+
key ? "jira-#{key}" : "jira-source"
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# @param spaces [String, Array<String>, nil] Confluence space keys.
|
|
144
|
+
# @param cloud_id [String, nil] Atlassian cloud/site id.
|
|
145
|
+
# @param base_url [String, nil] Confluence base URL, e.g. https://company.atlassian.net.
|
|
146
|
+
# @return [String] `confluence-<space>`, `confluence-<host>`, or `confluence-source`.
|
|
147
|
+
def confluence(spaces: nil, cloud_id: nil, base_url: nil)
|
|
148
|
+
space = Array(spaces).first
|
|
149
|
+
return "confluence-#{space}" if space
|
|
150
|
+
|
|
151
|
+
host = host_from_url(base_url)
|
|
152
|
+
return "confluence-#{host}" if host
|
|
153
|
+
return "confluence-#{cloud_id}" if cloud_id
|
|
154
|
+
|
|
155
|
+
"confluence-source"
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def host_from_url(url)
|
|
159
|
+
return nil if url.nil? || url.to_s.empty?
|
|
160
|
+
|
|
161
|
+
host = URI.parse(url.to_s).host
|
|
162
|
+
host && !host.empty? ? host : nil
|
|
163
|
+
rescue URI::InvalidURIError
|
|
164
|
+
nil
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# @param folder_ids [String, Array<String>, nil] folder ids.
|
|
168
|
+
# @param file_ids [String, Array<String>, nil] file ids.
|
|
169
|
+
# @return [String] `google-drive-<first id>` or `google-drive-source`.
|
|
170
|
+
def google_drive(folder_ids: nil, file_ids: nil)
|
|
171
|
+
first_folder = Array(folder_ids).first
|
|
172
|
+
first_file = Array(file_ids).first
|
|
173
|
+
suffix = first_folder || first_file
|
|
174
|
+
suffix ? "google-drive-#{suffix}" : "google-drive-source"
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Web-crawl ingestion source.
|
|
179
|
+
class WebSource < Source
|
|
180
|
+
# @param start_urls [String, Array<String>] required seed URLs.
|
|
181
|
+
# @param name [String, nil] defaults to `web-<host>` from the first URL, or `web-source`.
|
|
182
|
+
# @param description [String, nil] optional description.
|
|
183
|
+
# @param metadata [Hash, nil] optional metadata.
|
|
184
|
+
# @param id [String, nil] optional API source id.
|
|
185
|
+
# @param config [Hash] additional web-source config forwarded to the API.
|
|
186
|
+
# @return [WebSource]
|
|
187
|
+
def initialize(start_urls:, name: nil, description: nil, metadata: nil, id: nil, **config)
|
|
188
|
+
urls = Array(start_urls)
|
|
189
|
+
raise ArgumentError, "start_urls must not be empty" if urls.empty?
|
|
190
|
+
|
|
191
|
+
super(
|
|
192
|
+
id: id,
|
|
193
|
+
source_type: "web",
|
|
194
|
+
name: name || SourceNames.web(urls),
|
|
195
|
+
description: description,
|
|
196
|
+
metadata: metadata,
|
|
197
|
+
config: Utils.compact_hash(config.merge(start_urls: urls))
|
|
198
|
+
)
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# S3 ingestion source.
|
|
203
|
+
class S3Source < Source
|
|
204
|
+
# @param bucket [String] required S3 bucket name.
|
|
205
|
+
# @param name [String, nil] defaults to `s3-<bucket>` or `s3-<bucket>-<prefix>`.
|
|
206
|
+
# @param prefix [String, nil] optional object prefix.
|
|
207
|
+
# @param description [String, nil] optional description.
|
|
208
|
+
# @param metadata [Hash, nil] optional metadata.
|
|
209
|
+
# @param id [String, nil] optional API source id.
|
|
210
|
+
# @param config [Hash] additional S3-source config forwarded to the API.
|
|
211
|
+
# @return [S3Source]
|
|
212
|
+
def initialize(bucket:, name: nil, prefix: nil, description: nil, metadata: nil, id: nil, **config)
|
|
213
|
+
raise ArgumentError, "bucket is required" if bucket.nil? || bucket.to_s.empty?
|
|
214
|
+
|
|
215
|
+
super(
|
|
216
|
+
id: id,
|
|
217
|
+
source_type: "s3",
|
|
218
|
+
name: name || SourceNames.s3(bucket, prefix),
|
|
219
|
+
description: description,
|
|
220
|
+
metadata: metadata,
|
|
221
|
+
config: Utils.compact_hash(config.merge(bucket: bucket, prefix: prefix))
|
|
222
|
+
)
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
# Google Cloud Storage ingestion source.
|
|
227
|
+
class GCSSource < Source
|
|
228
|
+
def initialize(bucket:, name: nil, prefix: nil, connection_id: nil, description: nil, metadata: nil, id: nil, **config)
|
|
229
|
+
raise ArgumentError, "bucket is required" if bucket.nil? || bucket.to_s.empty?
|
|
230
|
+
|
|
231
|
+
super(
|
|
232
|
+
id: id,
|
|
233
|
+
source_type: "gcs",
|
|
234
|
+
name: name || SourceNames.gcs(bucket, prefix),
|
|
235
|
+
description: description,
|
|
236
|
+
metadata: metadata,
|
|
237
|
+
config: Utils.compact_hash(config.merge(bucket: bucket, prefix: prefix, connection_id: connection_id))
|
|
238
|
+
)
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Google Drive ingestion source.
|
|
243
|
+
class GoogleDriveSource < Source
|
|
244
|
+
# @param name [String, nil] defaults to `google-drive-<first id>` or `google-drive-source`.
|
|
245
|
+
# @param folder_ids [String, Array<String>, nil] folder ids to ingest; required if file_ids is empty.
|
|
246
|
+
# @param file_ids [String, Array<String>, nil] file ids to ingest; required if folder_ids is empty.
|
|
247
|
+
# @param auth_mode [String, nil] auth strategy (`service_account`, `oauth`); omitted from config when nil.
|
|
248
|
+
# @param service_account_json [Hash, String, nil] service-account credentials for `service_account` auth.
|
|
249
|
+
# @param oauth_credentials [Hash, nil] OAuth credentials for `oauth` auth.
|
|
250
|
+
# @param connection_id [String, nil] optional managed connection id used in place of inline credentials.
|
|
251
|
+
# @param description [String, nil] optional description.
|
|
252
|
+
# @param metadata [Hash, nil] optional metadata.
|
|
253
|
+
# @param id [String, nil] optional API source id.
|
|
254
|
+
# @param config [Hash] additional Google Drive-source config forwarded to the API.
|
|
255
|
+
# @return [GoogleDriveSource]
|
|
256
|
+
def initialize(name: nil, folder_ids: nil, file_ids: nil, auth_mode: nil, service_account_json: nil,
|
|
257
|
+
oauth_credentials: nil, connection_id: nil, description: nil, metadata: nil, id: nil, **config)
|
|
258
|
+
if Array(folder_ids).empty? && Array(file_ids).empty?
|
|
259
|
+
raise ArgumentError, "folder_ids or file_ids is required"
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
super(
|
|
263
|
+
id: id,
|
|
264
|
+
source_type: "gdrive",
|
|
265
|
+
name: name || SourceNames.google_drive(folder_ids: folder_ids, file_ids: file_ids),
|
|
266
|
+
description: description,
|
|
267
|
+
metadata: metadata,
|
|
268
|
+
config: Utils.compact_hash(config.merge(
|
|
269
|
+
folder_ids: folder_ids,
|
|
270
|
+
file_ids: file_ids,
|
|
271
|
+
auth_mode: auth_mode,
|
|
272
|
+
service_account_json: service_account_json,
|
|
273
|
+
oauth_credentials: oauth_credentials,
|
|
274
|
+
connection_id: connection_id
|
|
275
|
+
))
|
|
276
|
+
)
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
# File-upload ingestion source used by direct local file uploads.
|
|
281
|
+
class FileUploadSource < Source
|
|
282
|
+
# @param name [String, nil] defaults to timestamped `ruby-sdk-file-upload-YYYYmmddHHMMSS`.
|
|
283
|
+
# @param description [String, nil] optional description.
|
|
284
|
+
# @param metadata [Hash, nil] optional metadata.
|
|
285
|
+
# @param id [String, nil] optional API source id.
|
|
286
|
+
# @param storage_provider [String] storage backend; defaults to `s3`.
|
|
287
|
+
# @param sync_mode [String] sync strategy; defaults to `full`.
|
|
288
|
+
# @param config [Hash] additional file-upload config forwarded to the API.
|
|
289
|
+
# @return [FileUploadSource]
|
|
290
|
+
def initialize(name: nil, description: nil, metadata: nil, id: nil, storage_provider: "s3", sync_mode: "full", **config)
|
|
291
|
+
super(
|
|
292
|
+
id: id,
|
|
293
|
+
source_type: "file_upload",
|
|
294
|
+
name: name || SourceNames.file_upload,
|
|
295
|
+
description: description,
|
|
296
|
+
metadata: metadata,
|
|
297
|
+
config: Utils.compact_hash(config.merge(storage_provider: storage_provider, sync_mode: sync_mode))
|
|
298
|
+
)
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
# Jira ingestion source. include_comments defaults to true.
|
|
303
|
+
class JiraSource < Source
|
|
304
|
+
def initialize(cloud_id:, name: nil, access_token: nil, project_keys: nil, jql: nil, include_comments: true, connection_id: nil, description: nil, metadata: nil, id: nil, **config)
|
|
305
|
+
raise ArgumentError, "cloud_id is required" if cloud_id.nil? || cloud_id.to_s.empty?
|
|
306
|
+
|
|
307
|
+
super(
|
|
308
|
+
id: id,
|
|
309
|
+
source_type: "jira",
|
|
310
|
+
name: name || SourceNames.jira(project_keys: project_keys, cloud_id: cloud_id),
|
|
311
|
+
description: description,
|
|
312
|
+
metadata: metadata,
|
|
313
|
+
config: Utils.compact_hash(config.merge(cloud_id: cloud_id, access_token: access_token, project_keys: project_keys, jql: jql, include_comments: include_comments, connection_id: connection_id))
|
|
314
|
+
)
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
# Confluence ingestion source. Authenticates via basic auth (username + API
|
|
319
|
+
# token) by default, or Atlassian OAuth. include_attachments defaults to false.
|
|
320
|
+
class ConfluenceSource < Source
|
|
321
|
+
# @param cloud_id [String, nil] Atlassian OAuth cloud/site id; required unless base_url is given.
|
|
322
|
+
# @param base_url [String, nil] Confluence base URL, e.g. https://company.atlassian.net.
|
|
323
|
+
# @param name [String, nil] defaults to `confluence-<space>`/`confluence-<host>`/`confluence-source`.
|
|
324
|
+
# @param auth_mode [String] `basic` (default) or `oauth`.
|
|
325
|
+
# @param username [String, nil] username for basic auth.
|
|
326
|
+
# @param api_token [String, nil] API token for basic auth.
|
|
327
|
+
# @param oauth_credentials [Hash, nil] OAuth credentials for oauth auth_mode.
|
|
328
|
+
# @param spaces [String, Array<String>, nil] space keys to ingest; empty means all accessible.
|
|
329
|
+
# @param include_attachments [Boolean] include page attachments; defaults to false.
|
|
330
|
+
# @param connection_id [String, nil] optional managed connection id used in place of inline credentials.
|
|
331
|
+
# @param description [String, nil] optional description.
|
|
332
|
+
# @param metadata [Hash, nil] optional metadata.
|
|
333
|
+
# @param id [String, nil] optional API source id.
|
|
334
|
+
# @param config [Hash] additional Confluence-source config forwarded to the API.
|
|
335
|
+
# @return [ConfluenceSource]
|
|
336
|
+
def initialize(cloud_id: nil, base_url: nil, name: nil, auth_mode: "basic", username: nil, api_token: nil,
|
|
337
|
+
oauth_credentials: nil, spaces: nil, include_attachments: false, connection_id: nil, description: nil, metadata: nil, id: nil, **config)
|
|
338
|
+
if (cloud_id.nil? || cloud_id.to_s.empty?) && (base_url.nil? || base_url.to_s.empty?)
|
|
339
|
+
raise ArgumentError, "cloud_id or base_url is required"
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
super(
|
|
343
|
+
id: id,
|
|
344
|
+
source_type: "confluence",
|
|
345
|
+
name: name || SourceNames.confluence(spaces: spaces, cloud_id: cloud_id, base_url: base_url),
|
|
346
|
+
description: description,
|
|
347
|
+
metadata: metadata,
|
|
348
|
+
config: Utils.compact_hash(config.merge(
|
|
349
|
+
cloud_id: cloud_id,
|
|
350
|
+
base_url: base_url,
|
|
351
|
+
auth_mode: auth_mode,
|
|
352
|
+
username: username,
|
|
353
|
+
api_token: api_token,
|
|
354
|
+
oauth_credentials: oauth_credentials,
|
|
355
|
+
spaces: spaces.nil? ? nil : Array(spaces),
|
|
356
|
+
include_attachments: include_attachments,
|
|
357
|
+
connection_id: connection_id
|
|
358
|
+
))
|
|
359
|
+
)
|
|
360
|
+
end
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
# Escape hatch for API-compatible source types/configs not yet modeled by the SDK.
|
|
364
|
+
class GenericSource < Source
|
|
365
|
+
end
|
|
366
|
+
end
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module VectorAmp
|
|
4
|
+
module Transport
|
|
5
|
+
class Base
|
|
6
|
+
def request(method, path, query: nil, body: nil, headers: {}, stream: false, &block)
|
|
7
|
+
raise NotImplementedError, "transport must implement #request"
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
end
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "net/http"
|
|
5
|
+
require "uri"
|
|
6
|
+
|
|
7
|
+
require_relative "base"
|
|
8
|
+
require_relative "../error"
|
|
9
|
+
|
|
10
|
+
module VectorAmp
|
|
11
|
+
module Transport
|
|
12
|
+
class HTTP < Base
|
|
13
|
+
DEFAULT_TIMEOUT = 60
|
|
14
|
+
|
|
15
|
+
def initialize(base_url:, api_key:, timeout: DEFAULT_TIMEOUT)
|
|
16
|
+
@base_uri = URI(base_url)
|
|
17
|
+
@api_key = api_key
|
|
18
|
+
@timeout = timeout
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def request(method, path, query: nil, body: nil, headers: {}, stream: false, raw: false, &block)
|
|
22
|
+
uri = build_uri(path, query)
|
|
23
|
+
request = build_request(method, uri, body, headers)
|
|
24
|
+
|
|
25
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https", open_timeout: @timeout, read_timeout: @timeout) do |http|
|
|
26
|
+
if stream
|
|
27
|
+
return stream_response(http, request, &block)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
response = http.request(request)
|
|
31
|
+
handle_response(response, raw: raw)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
def build_uri(path, query)
|
|
38
|
+
uri = @base_uri.dup
|
|
39
|
+
base_path = uri.path.to_s.chomp("/")
|
|
40
|
+
relative = path.to_s.start_with?("/") ? path : "/#{path}"
|
|
41
|
+
uri.path = "#{base_path}#{relative}"
|
|
42
|
+
params = query&.compact
|
|
43
|
+
uri.query = URI.encode_www_form(params) if params && !params.empty?
|
|
44
|
+
uri
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def build_request(method, uri, body, headers)
|
|
48
|
+
klass = Net::HTTP.const_get(method.to_s.capitalize)
|
|
49
|
+
request = klass.new(uri)
|
|
50
|
+
request["Accept"] = "text/event-stream, application/json"
|
|
51
|
+
request["Content-Type"] = "application/json" if body
|
|
52
|
+
request["User-Agent"] = "vector_amp-ruby/#{VectorAmp::VERSION}"
|
|
53
|
+
request["X-API-Key"] = @api_key
|
|
54
|
+
headers.each { |key, value| request[key.to_s] = value }
|
|
55
|
+
request.body = JSON.generate(body) if body
|
|
56
|
+
request
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def handle_response(response, raw: false)
|
|
60
|
+
return follow_redirect(response, raw: raw) if redirect?(response)
|
|
61
|
+
return response.body if raw && response.is_a?(Net::HTTPSuccess)
|
|
62
|
+
|
|
63
|
+
parsed = parse_body(response.body)
|
|
64
|
+
return parsed if response.is_a?(Net::HTTPSuccess)
|
|
65
|
+
|
|
66
|
+
message = parsed.is_a?(Hash) ? (parsed["error"] || parsed["message"] || response.message) : response.message
|
|
67
|
+
raise APIError.new(message, status: response.code.to_i, body: parsed, headers: response.to_hash)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def redirect?(response)
|
|
71
|
+
response.is_a?(Net::HTTPRedirection) && response["location"]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def follow_redirect(response, raw:)
|
|
75
|
+
uri = URI(response["location"])
|
|
76
|
+
uri = @base_uri + response["location"] unless uri.absolute?
|
|
77
|
+
request = build_request(:get, uri, nil, {})
|
|
78
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https", open_timeout: @timeout, read_timeout: @timeout) do |http|
|
|
79
|
+
handle_response(http.request(request), raw: raw)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def stream_response(http, request)
|
|
84
|
+
http.request(request) do |response|
|
|
85
|
+
unless response.is_a?(Net::HTTPSuccess)
|
|
86
|
+
body = response.body || ""
|
|
87
|
+
parsed = parse_body(body)
|
|
88
|
+
message = parsed.is_a?(Hash) ? (parsed["error"] || parsed["message"] || response.message) : response.message
|
|
89
|
+
raise APIError.new(message, status: response.code.to_i, body: parsed, headers: response.to_hash)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
parser = SSEParser.new
|
|
93
|
+
response.read_body do |chunk|
|
|
94
|
+
parser.feed(chunk) do |event|
|
|
95
|
+
yield event if block_given?
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
parser.flush { |event| yield event if block_given? }
|
|
99
|
+
end
|
|
100
|
+
nil
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def parse_body(body)
|
|
104
|
+
return nil if body.nil? || body.empty?
|
|
105
|
+
|
|
106
|
+
JSON.parse(body)
|
|
107
|
+
rescue JSON::ParserError
|
|
108
|
+
body
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
class SSEParser
|
|
113
|
+
def initialize
|
|
114
|
+
@buffer = +""
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def feed(chunk)
|
|
118
|
+
@buffer << chunk
|
|
119
|
+
while (index = @buffer.index(/\r?\n\r?\n/))
|
|
120
|
+
frame = @buffer.slice!(0...index)
|
|
121
|
+
@buffer.sub!(/\A\r?\n\r?\n/, "")
|
|
122
|
+
emit(frame) { |event| yield event }
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def flush
|
|
127
|
+
emit(@buffer) { |event| yield event } unless @buffer.empty?
|
|
128
|
+
@buffer.clear
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
private
|
|
132
|
+
|
|
133
|
+
def emit(frame)
|
|
134
|
+
data = frame.each_line.filter_map do |line|
|
|
135
|
+
stripped = line.strip
|
|
136
|
+
next if stripped.empty? || stripped.start_with?(":")
|
|
137
|
+
next unless stripped.start_with?("data:")
|
|
138
|
+
|
|
139
|
+
stripped.delete_prefix("data:").strip
|
|
140
|
+
end.join("\n")
|
|
141
|
+
return if data.empty? || data == "[DONE]"
|
|
142
|
+
|
|
143
|
+
yield JSON.parse(data)
|
|
144
|
+
rescue JSON::ParserError
|
|
145
|
+
yield data
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module VectorAmp
|
|
4
|
+
module Utils
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
def compact_hash(hash)
|
|
8
|
+
hash.each_with_object({}) do |(key, value), result|
|
|
9
|
+
result[key] = value unless value.nil?
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def ensure_no_unknown!(unknown, method_name)
|
|
14
|
+
return if unknown.empty?
|
|
15
|
+
|
|
16
|
+
keys = unknown.keys.map(&:to_s).join(", ")
|
|
17
|
+
raise ArgumentError, "unknown #{method_name} option(s): #{keys}"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Coerce a vector record id into a JSON-safe value that preserves numeric ids.
|
|
21
|
+
#
|
|
22
|
+
# Integers (and integer-valued floats) are returned as Integers so they
|
|
23
|
+
# serialize as JSON numbers rather than strings. Everything else is left as
|
|
24
|
+
# given (strings stay strings). This prevents the API from rewriting numeric
|
|
25
|
+
# ids that were sent as quoted strings.
|
|
26
|
+
# @param id [Object] vector id.
|
|
27
|
+
# @return [Object] the id, with numeric ids preserved as numbers.
|
|
28
|
+
def coerce_vector_id(id)
|
|
29
|
+
case id
|
|
30
|
+
when Integer then id
|
|
31
|
+
when Float then id == id.to_i ? id.to_i : id
|
|
32
|
+
else id
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Normalize a list of vector records so numeric ids stay numeric.
|
|
37
|
+
# @param vectors [Array<Hash>] vector records.
|
|
38
|
+
# @return [Array<Hash>] records with id values coerced via {coerce_vector_id}.
|
|
39
|
+
def normalize_vectors(vectors)
|
|
40
|
+
Array(vectors).map do |vector|
|
|
41
|
+
next vector unless vector.is_a?(Hash)
|
|
42
|
+
next vector unless vector.key?(:id) || vector.key?("id")
|
|
43
|
+
|
|
44
|
+
copy = vector.dup
|
|
45
|
+
if copy.key?(:id)
|
|
46
|
+
copy[:id] = coerce_vector_id(copy[:id])
|
|
47
|
+
else
|
|
48
|
+
copy["id"] = coerce_vector_id(copy["id"])
|
|
49
|
+
end
|
|
50
|
+
copy
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
data/lib/vector_amp.rb
ADDED