vector_amp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +201 -0
- data/NOTICE +8 -0
- data/README.md +377 -0
- data/lib/vector_amp/client.rb +61 -0
- data/lib/vector_amp/connections.rb +49 -0
- data/lib/vector_amp/dataset.rb +182 -0
- data/lib/vector_amp/datasets.rb +237 -0
- data/lib/vector_amp/embedding.rb +67 -0
- data/lib/vector_amp/error.rb +17 -0
- data/lib/vector_amp/ingestion.rb +416 -0
- data/lib/vector_amp/intelligence.rb +101 -0
- data/lib/vector_amp/schedules.rb +81 -0
- data/lib/vector_amp/source.rb +366 -0
- data/lib/vector_amp/transport/base.rb +11 -0
- data/lib/vector_amp/transport/http.rb +149 -0
- data/lib/vector_amp/utils.rb +54 -0
- data/lib/vector_amp/version.rb +5 -0
- data/lib/vector_amp.rb +10 -0
- metadata +150 -0
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "net/http"
|
|
5
|
+
require_relative "source"
|
|
6
|
+
require_relative "utils"
|
|
7
|
+
|
|
8
|
+
module VectorAmp
|
|
9
|
+
# Ingestion API resource for sources, jobs, and direct file uploads.
|
|
10
|
+
class IngestionResource
|
|
11
|
+
# @param transport [#request] API transport.
|
|
12
|
+
# @return [IngestionResource]
|
|
13
|
+
def initialize(transport)
|
|
14
|
+
@transport = transport
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# List ingestion sources.
|
|
18
|
+
# @param limit [Integer] page size; defaults to 50.
|
|
19
|
+
# @param offset [Integer] page offset; defaults to 0.
|
|
20
|
+
# @return [Hash] response envelope from the API.
|
|
21
|
+
def list_sources(limit: 50, offset: 0)
|
|
22
|
+
@transport.request(:get, "/ingestion/sources", query: { limit: limit, offset: offset })
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Fetch an ingestion source.
|
|
26
|
+
# @param source_id [String] source id.
|
|
27
|
+
# @return [Hash] source response.
|
|
28
|
+
def get_source(source_id)
|
|
29
|
+
@transport.request(:get, "/ingestion/sources/#{source_id}")
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Delete an ingestion source.
|
|
33
|
+
# @param source_id [String] source id.
|
|
34
|
+
# @param force [Boolean] force deletion even if the source is still referenced; sends `?force=true`.
|
|
35
|
+
# @return [Hash] delete response.
|
|
36
|
+
def delete_source(source_id, force: false)
|
|
37
|
+
query = force ? { force: true } : nil
|
|
38
|
+
@transport.request(:delete, "/ingestion/sources/#{source_id}", query: query)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# List sources that are not referenced by any job, schedule, or dataset.
|
|
42
|
+
# @param limit [Integer] page size; defaults to 50.
|
|
43
|
+
# @param offset [Integer] page offset; defaults to 0.
|
|
44
|
+
# @return [Hash] response envelope from the API.
|
|
45
|
+
def list_unused_sources(limit: 50, offset: 0)
|
|
46
|
+
@transport.request(:get, "/ingestion/sources/unused", query: { limit: limit, offset: offset })
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Delete all unused (unreferenced) sources.
|
|
50
|
+
# @return [Hash] cleanup response.
|
|
51
|
+
def cleanup_unused_sources
|
|
52
|
+
@transport.request(:post, "/ingestion/sources/cleanup")
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# List references (jobs, schedules, datasets) that use a source.
|
|
56
|
+
# @param source_id [String] source id.
|
|
57
|
+
# @return [Hash] references response.
|
|
58
|
+
def source_references(source_id)
|
|
59
|
+
@transport.request(:get, "/ingestion/sources/#{source_id}/references")
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Validate a source type and config without creating a source.
|
|
63
|
+
# @param source_type [String, Symbol] source type to validate.
|
|
64
|
+
# @param config [Hash] source-specific config to validate.
|
|
65
|
+
# @return [Hash] validation response.
|
|
66
|
+
def validate_source(source_type:, config:)
|
|
67
|
+
@transport.request(:post, "/ingestion/sources/validate", body: { source_type: source_type, config: config })
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Create an ingestion source from a Source object/hash or explicit options.
|
|
71
|
+
# @param source [Source, Hash, nil] optional source object/hash; when supplied, option fields are ignored.
|
|
72
|
+
# @param source_type [String, Symbol, nil] source type (`s3`, `web`, `gdrive`, or `file_upload`).
|
|
73
|
+
# @param name [String, nil] source name; defaults by source type when omitted.
|
|
74
|
+
# @param config [Hash, nil] source-specific config.
|
|
75
|
+
# @param description [String, nil] optional description.
|
|
76
|
+
# @param metadata [Hash, nil] optional metadata.
|
|
77
|
+
# @return [Hash] created source response.
|
|
78
|
+
def create_source(source = nil, source_type: nil, name: nil, config: nil, description: nil, metadata: nil)
|
|
79
|
+
body = source ? source_create_body(source) : source_create_body_from_options(
|
|
80
|
+
source_type: source_type,
|
|
81
|
+
name: name,
|
|
82
|
+
description: description,
|
|
83
|
+
config: config,
|
|
84
|
+
metadata: metadata
|
|
85
|
+
)
|
|
86
|
+
@transport.request(:post, "/ingestion/sources", body: body)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Alias for {#create_source}.
|
|
90
|
+
# @return [Hash] created source response.
|
|
91
|
+
def create(source = nil, **options)
|
|
92
|
+
create_source(source, **options)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Create a web source.
|
|
96
|
+
# @param start_urls [String, Array<String>] required seed URLs.
|
|
97
|
+
# @param name [String, nil] defaults to `web-<host>` from the first URL, or `web-source`.
|
|
98
|
+
# @param description [String, nil] optional description.
|
|
99
|
+
# @param metadata [Hash, nil] optional metadata.
|
|
100
|
+
# @param config [Hash] additional web-source config forwarded to the API.
|
|
101
|
+
# @return [Hash] created source response.
|
|
102
|
+
def create_web(start_urls:, name: nil, description: nil, metadata: nil, **config)
|
|
103
|
+
create_source(WebSource.new(
|
|
104
|
+
name: name,
|
|
105
|
+
start_urls: start_urls,
|
|
106
|
+
description: description,
|
|
107
|
+
metadata: metadata,
|
|
108
|
+
**config
|
|
109
|
+
))
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Create an S3 source.
|
|
113
|
+
# @param bucket [String] required S3 bucket name.
|
|
114
|
+
# @param name [String, nil] defaults to `s3-<bucket>` or `s3-<bucket>-<prefix>`.
|
|
115
|
+
# @param prefix [String, nil] optional object prefix.
|
|
116
|
+
# @param description [String, nil] optional description.
|
|
117
|
+
# @param metadata [Hash, nil] optional metadata.
|
|
118
|
+
# @param config [Hash] additional S3-source config forwarded to the API.
|
|
119
|
+
# @return [Hash] created source response.
|
|
120
|
+
def create_s3(bucket:, name: nil, prefix: nil, description: nil, metadata: nil, **config)
|
|
121
|
+
create_source(S3Source.new(
|
|
122
|
+
name: name,
|
|
123
|
+
bucket: bucket,
|
|
124
|
+
prefix: prefix,
|
|
125
|
+
description: description,
|
|
126
|
+
metadata: metadata,
|
|
127
|
+
**config
|
|
128
|
+
))
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Create a Google Cloud Storage source.
|
|
132
|
+
# @param bucket [String] required GCS bucket name.
|
|
133
|
+
# @param name [String, nil] defaults to `gcs-<bucket>` or `gcs-<bucket>-<prefix>`.
|
|
134
|
+
# @param prefix [String, nil] optional object prefix.
|
|
135
|
+
# @param connection_id [String, nil] optional managed connection id used in place of inline credentials.
|
|
136
|
+
# @param description [String, nil] optional description.
|
|
137
|
+
# @param metadata [Hash, nil] optional metadata.
|
|
138
|
+
# @param config [Hash] additional GCS-source config forwarded to the API.
|
|
139
|
+
# @return [Hash] created source response.
|
|
140
|
+
def create_gcs(bucket:, name: nil, prefix: nil, connection_id: nil, description: nil, metadata: nil, **config)
|
|
141
|
+
create_source(GCSSource.new(
|
|
142
|
+
bucket: bucket,
|
|
143
|
+
name: name,
|
|
144
|
+
prefix: prefix,
|
|
145
|
+
connection_id: connection_id,
|
|
146
|
+
description: description,
|
|
147
|
+
metadata: metadata,
|
|
148
|
+
**config
|
|
149
|
+
))
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def create_jira(cloud_id:, name: nil, access_token: nil, project_keys: nil, jql: nil, include_comments: true, connection_id: nil, description: nil, metadata: nil, **config)
|
|
153
|
+
create_source(JiraSource.new(
|
|
154
|
+
cloud_id: cloud_id,
|
|
155
|
+
name: name,
|
|
156
|
+
access_token: access_token,
|
|
157
|
+
project_keys: project_keys,
|
|
158
|
+
jql: jql,
|
|
159
|
+
include_comments: include_comments,
|
|
160
|
+
connection_id: connection_id,
|
|
161
|
+
description: description,
|
|
162
|
+
metadata: metadata,
|
|
163
|
+
**config
|
|
164
|
+
))
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Create a Google Drive source.
|
|
168
|
+
# @param name [String, nil] defaults to `google-drive-<first id>` or `google-drive-source`.
|
|
169
|
+
# @param folder_ids [String, Array<String>, nil] folder ids to ingest; required if file_ids is empty.
|
|
170
|
+
# @param file_ids [String, Array<String>, nil] file ids to ingest; required if folder_ids is empty.
|
|
171
|
+
# @param auth_mode [String, nil] auth strategy (`service_account`, `oauth`); omitted when nil.
|
|
172
|
+
# @param service_account_json [Hash, String, nil] service-account credentials for `service_account` auth.
|
|
173
|
+
# @param oauth_credentials [Hash, nil] OAuth credentials for `oauth` auth.
|
|
174
|
+
# @param connection_id [String, nil] optional managed connection id used in place of inline credentials.
|
|
175
|
+
# @param description [String, nil] optional description.
|
|
176
|
+
# @param metadata [Hash, nil] optional metadata.
|
|
177
|
+
# @param config [Hash] additional Google Drive-source config forwarded to the API.
|
|
178
|
+
# @return [Hash] created source response.
|
|
179
|
+
def create_google_drive(name: nil, folder_ids: nil, file_ids: nil, auth_mode: nil, service_account_json: nil,
|
|
180
|
+
oauth_credentials: nil, connection_id: nil, description: nil, metadata: nil, **config)
|
|
181
|
+
create_source(GoogleDriveSource.new(
|
|
182
|
+
name: name,
|
|
183
|
+
folder_ids: folder_ids,
|
|
184
|
+
file_ids: file_ids,
|
|
185
|
+
auth_mode: auth_mode,
|
|
186
|
+
service_account_json: service_account_json,
|
|
187
|
+
oauth_credentials: oauth_credentials,
|
|
188
|
+
connection_id: connection_id,
|
|
189
|
+
description: description,
|
|
190
|
+
metadata: metadata,
|
|
191
|
+
**config
|
|
192
|
+
))
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# Create a Confluence source.
|
|
196
|
+
# @param cloud_id [String, nil] Atlassian OAuth cloud/site id; required unless base_url is given.
|
|
197
|
+
# @param base_url [String, nil] Confluence base URL, e.g. https://company.atlassian.net.
|
|
198
|
+
# @param name [String, nil] defaults to `confluence-<space>`/`confluence-<host>`/`confluence-source`.
|
|
199
|
+
# @param auth_mode [String] `basic` (default) or `oauth`.
|
|
200
|
+
# @param username [String, nil] username for basic auth.
|
|
201
|
+
# @param api_token [String, nil] API token for basic auth.
|
|
202
|
+
# @param spaces [String, Array<String>, nil] space keys to ingest; empty means all accessible.
|
|
203
|
+
# @param include_attachments [Boolean] include page attachments; defaults to false.
|
|
204
|
+
# @param connection_id [String, nil] optional managed connection id used in place of inline credentials.
|
|
205
|
+
# @param description [String, nil] optional description.
|
|
206
|
+
# @param metadata [Hash, nil] optional metadata.
|
|
207
|
+
# @param config [Hash] additional Confluence-source config forwarded to the API.
|
|
208
|
+
# @return [Hash] created source response.
|
|
209
|
+
def create_confluence(cloud_id: nil, base_url: nil, name: nil, auth_mode: "basic", username: nil, api_token: nil,
|
|
210
|
+
spaces: nil, include_attachments: false, connection_id: nil, description: nil, metadata: nil, **config)
|
|
211
|
+
create_source(ConfluenceSource.new(
|
|
212
|
+
cloud_id: cloud_id,
|
|
213
|
+
base_url: base_url,
|
|
214
|
+
name: name,
|
|
215
|
+
auth_mode: auth_mode,
|
|
216
|
+
username: username,
|
|
217
|
+
api_token: api_token,
|
|
218
|
+
spaces: spaces,
|
|
219
|
+
include_attachments: include_attachments,
|
|
220
|
+
connection_id: connection_id,
|
|
221
|
+
description: description,
|
|
222
|
+
metadata: metadata,
|
|
223
|
+
**config
|
|
224
|
+
))
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Create a file-upload source.
|
|
228
|
+
# @param name [String, nil] defaults to timestamped `ruby-sdk-file-upload-YYYYmmddHHMMSS`.
|
|
229
|
+
# @param description [String, nil] optional description.
|
|
230
|
+
# @param metadata [Hash, nil] optional metadata.
|
|
231
|
+
# @param storage_provider [String] storage backend; defaults to `s3`.
|
|
232
|
+
# @param sync_mode [String] sync strategy; defaults to `full`.
|
|
233
|
+
# @param config [Hash] additional file-upload config forwarded to the API.
|
|
234
|
+
# @return [Hash] created source response.
|
|
235
|
+
def create_file_upload(name: nil, description: nil, metadata: nil, storage_provider: "s3", sync_mode: "full", **config)
|
|
236
|
+
create_source(FileUploadSource.new(
|
|
237
|
+
name: name,
|
|
238
|
+
description: description,
|
|
239
|
+
metadata: metadata,
|
|
240
|
+
storage_provider: storage_provider,
|
|
241
|
+
sync_mode: sync_mode,
|
|
242
|
+
**config
|
|
243
|
+
))
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Start an ingestion job for a source and dataset.
|
|
247
|
+
# @param source_id [String] source id.
|
|
248
|
+
# @param dataset_id [String] target dataset id.
|
|
249
|
+
# @param pipeline_id [String, nil] optional pipeline id.
|
|
250
|
+
# @return [Hash] ingestion job response.
|
|
251
|
+
def start_job(source_id:, dataset_id:, pipeline_id: nil)
|
|
252
|
+
@transport.request(:post, "/ingestion/jobs", body: Utils.compact_hash(
|
|
253
|
+
source_id: source_id,
|
|
254
|
+
dataset_id: dataset_id,
|
|
255
|
+
pipeline_id: pipeline_id
|
|
256
|
+
))
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# List ingestion jobs.
|
|
260
|
+
# @param dataset_id [String, nil] optional dataset filter.
|
|
261
|
+
# @param limit [Integer] page size; defaults to 50.
|
|
262
|
+
# @param offset [Integer] page offset; defaults to 0.
|
|
263
|
+
# @return [Hash] response envelope from the API.
|
|
264
|
+
def list_jobs(dataset_id: nil, limit: 50, offset: 0)
|
|
265
|
+
@transport.request(:get, "/ingestion/jobs", query: Utils.compact_hash(dataset_id: dataset_id, limit: limit, offset: offset))
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# Fetch an ingestion job.
|
|
269
|
+
# @param job_id [String] job id.
|
|
270
|
+
# @return [Hash] job response.
|
|
271
|
+
def get_job(job_id)
|
|
272
|
+
@transport.request(:get, "/ingestion/jobs/#{job_id}")
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
# Retry an eligible failed or cancelled ingestion job as a fresh full rerun.
|
|
276
|
+
# @param job_id [String] original job id.
|
|
277
|
+
# @return [Hash] newly queued retry job response.
|
|
278
|
+
def retry_job(job_id)
|
|
279
|
+
@transport.request(:post, "/ingestion/jobs/#{job_id}/retry")
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
# List files attached to an ingestion job.
|
|
283
|
+
# @param job_id [String] job id.
|
|
284
|
+
# @return [Hash] files response.
|
|
285
|
+
def job_files(job_id)
|
|
286
|
+
@transport.request(:get, "/ingestion/jobs/#{job_id}/files")
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# Fetch ingestion job statistics.
|
|
290
|
+
# @param job_id [String] job id.
|
|
291
|
+
# @return [Hash] statistics response.
|
|
292
|
+
def job_statistics(job_id)
|
|
293
|
+
@transport.request(:get, "/ingestion/jobs/#{job_id}/statistics")
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# Cancel an ingestion job.
|
|
297
|
+
# @param job_id [String] job id.
|
|
298
|
+
# @return [Hash] cancel response.
|
|
299
|
+
def cancel_job(job_id)
|
|
300
|
+
@transport.request(:delete, "/ingestion/jobs/#{job_id}/cancel")
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# Upload local files by auto-creating a `file_upload` source, initializing presigned uploads, and completing the upload job.
|
|
304
|
+
# @param dataset_id [String] target dataset id; also added to source metadata.
|
|
305
|
+
# @param paths [String, Array<String>] local file paths to upload.
|
|
306
|
+
# @param source_name [String, nil] optional source name; defaults to timestamped Ruby SDK file-upload name.
|
|
307
|
+
# @param description [String, nil] optional source description.
|
|
308
|
+
# @param metadata [Hash] optional source metadata merged with dataset_id.
|
|
309
|
+
# @return [Hash] upload completion/job response.
|
|
310
|
+
def ingest_files(dataset_id:, paths:, source_name: nil, description: nil, metadata: {})
|
|
311
|
+
files = Array(paths).map { |path| Pathname(path) }
|
|
312
|
+
raise ArgumentError, "paths must not be empty" if files.empty?
|
|
313
|
+
|
|
314
|
+
source = create_file_upload(
|
|
315
|
+
name: source_name,
|
|
316
|
+
description: description,
|
|
317
|
+
metadata: (metadata || {}).merge(dataset_id: dataset_id)
|
|
318
|
+
)
|
|
319
|
+
source_id = source.fetch("id") { source.fetch(:id) }
|
|
320
|
+
|
|
321
|
+
init = init_upload(source_id, files)
|
|
322
|
+
upload_files_to_presigned_urls(files, init.fetch("uploads"))
|
|
323
|
+
job_id = init.fetch("job_id")
|
|
324
|
+
response = complete_upload(source_id, job_id: job_id, file_ids: init.fetch("uploads").map { |upload| upload.fetch("file_id") })
|
|
325
|
+
response["job_id"] ||= job_id if response.is_a?(Hash)
|
|
326
|
+
response
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
# Initialize presigned uploads for source files.
|
|
330
|
+
# @param source_id [String] file-upload source id.
|
|
331
|
+
# @param files [Array<String, Pathname>] local files.
|
|
332
|
+
# @return [Hash] init response containing uploads and job_id.
|
|
333
|
+
def init_upload(source_id, files)
|
|
334
|
+
payload = Array(files).map do |file|
|
|
335
|
+
path = Pathname(file)
|
|
336
|
+
{
|
|
337
|
+
name: path.to_s,
|
|
338
|
+
size_bytes: path.size,
|
|
339
|
+
content_type: content_type_for(path)
|
|
340
|
+
}
|
|
341
|
+
end
|
|
342
|
+
@transport.request(:post, "/ingestion/sources/#{source_id}/upload/init", body: { files: payload })
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
# Complete a file upload job after files have been PUT to presigned URLs.
|
|
346
|
+
# @param source_id [String] file-upload source id.
|
|
347
|
+
# @param job_id [String] upload job id from {#init_upload}.
|
|
348
|
+
# @param file_ids [Array<String>] file ids from {#init_upload}.
|
|
349
|
+
# @return [Hash] upload completion/job response.
|
|
350
|
+
def complete_upload(source_id, job_id:, file_ids:)
|
|
351
|
+
@transport.request(:post, "/ingestion/sources/#{source_id}/upload/complete", body: { job_id: job_id, file_ids: file_ids })
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
private
|
|
355
|
+
|
|
356
|
+
def source_create_body(source)
|
|
357
|
+
return source.to_create_body if source.respond_to?(:to_create_body)
|
|
358
|
+
|
|
359
|
+
hash = Source.normalize_hash(source)
|
|
360
|
+
source_create_body_from_options(
|
|
361
|
+
source_type: hash["source_type"],
|
|
362
|
+
name: hash["name"],
|
|
363
|
+
description: hash["description"],
|
|
364
|
+
config: hash["config"],
|
|
365
|
+
metadata: hash["metadata"]
|
|
366
|
+
)
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
def source_create_body_from_options(source_type:, name:, config:, description: nil, metadata: nil)
|
|
370
|
+
resolved_type = source_type&.to_s
|
|
371
|
+
Utils.compact_hash(
|
|
372
|
+
source_type: resolved_type,
|
|
373
|
+
name: name || default_source_name(resolved_type, config || {}),
|
|
374
|
+
description: description,
|
|
375
|
+
config: config,
|
|
376
|
+
metadata: metadata
|
|
377
|
+
)
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
def default_source_name(source_type, config)
|
|
381
|
+
case source_type
|
|
382
|
+
when "file_upload" then SourceNames.file_upload
|
|
383
|
+
when "web" then SourceNames.web(config[:start_urls] || config["start_urls"])
|
|
384
|
+
when "s3" then SourceNames.s3(config[:bucket] || config["bucket"], config[:prefix] || config["prefix"])
|
|
385
|
+
when "gcs" then SourceNames.gcs(config[:bucket] || config["bucket"], config[:prefix] || config["prefix"])
|
|
386
|
+
when "jira" then SourceNames.jira(project_keys: config[:project_keys] || config["project_keys"], cloud_id: config[:cloud_id] || config["cloud_id"])
|
|
387
|
+
when "confluence" then SourceNames.confluence(spaces: config[:spaces] || config["spaces"], cloud_id: config[:cloud_id] || config["cloud_id"], base_url: config[:base_url] || config["base_url"])
|
|
388
|
+
when "gdrive" then SourceNames.google_drive(folder_ids: config[:folder_ids] || config["folder_ids"], file_ids: config[:file_ids] || config["file_ids"])
|
|
389
|
+
end
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
def upload_files_to_presigned_urls(files, uploads)
|
|
393
|
+
files.zip(uploads).each do |file, upload|
|
|
394
|
+
uri = URI(upload.fetch("upload_url"))
|
|
395
|
+
request = Net::HTTP::Put.new(uri)
|
|
396
|
+
request["Content-Type"] = content_type_for(file)
|
|
397
|
+
request.body = Pathname(file).binread
|
|
398
|
+
response = Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https") { |http| http.request(request) }
|
|
399
|
+
next if response.is_a?(Net::HTTPSuccess)
|
|
400
|
+
|
|
401
|
+
raise APIError.new("failed to upload #{file}", status: response.code.to_i, body: response.body, headers: response.to_hash)
|
|
402
|
+
end
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
def content_type_for(path)
|
|
406
|
+
case Pathname(path).extname.downcase
|
|
407
|
+
when ".txt", ".md", ".markdown" then "text/plain"
|
|
408
|
+
when ".json" then "application/json"
|
|
409
|
+
when ".csv" then "text/csv"
|
|
410
|
+
when ".pdf" then "application/pdf"
|
|
411
|
+
when ".html", ".htm" then "text/html"
|
|
412
|
+
else "application/octet-stream"
|
|
413
|
+
end
|
|
414
|
+
end
|
|
415
|
+
end
|
|
416
|
+
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "utils"
|
|
4
|
+
|
|
5
|
+
module VectorAmp
|
|
6
|
+
# Intelligence API resource for retrieval-augmented question answering.
|
|
7
|
+
class IntelligenceResource
|
|
8
|
+
# @param transport [#request] API transport.
|
|
9
|
+
# @return [IntelligenceResource]
|
|
10
|
+
def initialize(transport)
|
|
11
|
+
@transport = transport
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Ask an intelligence query, optionally scoped to a dataset and streamed.
|
|
15
|
+
# @param query [String] natural-language question.
|
|
16
|
+
# @param dataset_id [String, nil] optional dataset id scope.
|
|
17
|
+
# @param top_k [Integer, nil] optional retrieval result count.
|
|
18
|
+
# @param conversation_history [Array<Hash>, nil] optional prior conversation messages.
|
|
19
|
+
# @param include_sources [Boolean, nil] include source chunks/citations when supported.
|
|
20
|
+
# @param stream [Boolean] stream chunks when true; defaults to false.
|
|
21
|
+
# @yieldparam chunk [Object] streamed response chunk when stream is true.
|
|
22
|
+
# @return [Hash, Enumerator, Object] response hash, enumerator without a stream block, or transport stream result.
|
|
23
|
+
def query(query, dataset_id: nil, top_k: nil, conversation_history: nil, include_sources: nil, stream: false, **unknown, &block)
|
|
24
|
+
Utils.ensure_no_unknown!(unknown, "query")
|
|
25
|
+
body = Utils.compact_hash(
|
|
26
|
+
query: query,
|
|
27
|
+
dataset_id: dataset_id,
|
|
28
|
+
top_k: top_k,
|
|
29
|
+
conversation_history: conversation_history,
|
|
30
|
+
include_sources: include_sources,
|
|
31
|
+
stream: stream
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
if stream
|
|
35
|
+
return enum_for(:query, query, dataset_id: dataset_id, top_k: top_k,
|
|
36
|
+
conversation_history: conversation_history, include_sources: include_sources,
|
|
37
|
+
stream: true) unless block
|
|
38
|
+
|
|
39
|
+
@transport.request(:post, "/intelligence/query", body: body, stream: true, &block)
|
|
40
|
+
else
|
|
41
|
+
@transport.request(:post, "/intelligence/query", body: body)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Create an intelligence conversation session.
|
|
46
|
+
# @param title [String, nil] optional session title.
|
|
47
|
+
# @param dataset_id [String, nil] optional dataset scope.
|
|
48
|
+
# @param workspace_id [String, nil] optional workspace id.
|
|
49
|
+
# @param metadata [Hash, nil] optional session metadata.
|
|
50
|
+
# @return [Hash] created session.
|
|
51
|
+
def create_session(title: nil, dataset_id: nil, workspace_id: nil, metadata: nil)
|
|
52
|
+
body = Utils.compact_hash(
|
|
53
|
+
title: title,
|
|
54
|
+
dataset_id: dataset_id,
|
|
55
|
+
workspace_id: workspace_id,
|
|
56
|
+
metadata: metadata
|
|
57
|
+
)
|
|
58
|
+
@transport.request(:post, "/intelligence/sessions", body: body)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# List intelligence sessions.
|
|
62
|
+
# @param limit [Integer] page size; defaults to 50.
|
|
63
|
+
# @return [Hash] response envelope with `sessions`.
|
|
64
|
+
def list_sessions(limit: 50)
|
|
65
|
+
@transport.request(:get, "/intelligence/sessions", query: { limit: limit })
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Fetch an intelligence session.
|
|
69
|
+
# @param session_id [String] session id.
|
|
70
|
+
# @return [Hash] session resource.
|
|
71
|
+
def get_session(session_id)
|
|
72
|
+
@transport.request(:get, "/intelligence/sessions/#{session_id}")
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Delete an intelligence session.
|
|
76
|
+
# @param session_id [String] session id.
|
|
77
|
+
# @return [Hash] delete response.
|
|
78
|
+
def delete_session(session_id)
|
|
79
|
+
@transport.request(:delete, "/intelligence/sessions/#{session_id}")
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Append a message to a session.
|
|
83
|
+
# @param session_id [String] session id.
|
|
84
|
+
# @param role [String] message role: `user`, `assistant`, `system`, or `tool`.
|
|
85
|
+
# @param content [String] message content.
|
|
86
|
+
# @param metadata [Hash, nil] optional message metadata.
|
|
87
|
+
# @return [Hash] created message.
|
|
88
|
+
def append_message(session_id, role:, content:, metadata: nil)
|
|
89
|
+
body = Utils.compact_hash(role: role, content: content, metadata: metadata)
|
|
90
|
+
@transport.request(:post, "/intelligence/sessions/#{session_id}/messages", body: body)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# List messages in a session.
|
|
94
|
+
# @param session_id [String] session id.
|
|
95
|
+
# @param limit [Integer] page size; defaults to 100.
|
|
96
|
+
# @return [Hash] response envelope with `messages`.
|
|
97
|
+
def list_messages(session_id, limit: 100)
|
|
98
|
+
@transport.request(:get, "/intelligence/sessions/#{session_id}/messages", query: { limit: limit })
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module VectorAmp
|
|
4
|
+
# Recurring ingestion schedules. A schedule pairs a source with a target
|
|
5
|
+
# dataset and a cron expression; the ingestion scheduler daemon polls for due
|
|
6
|
+
# schedules and creates jobs as they fire.
|
|
7
|
+
class SchedulesResource
|
|
8
|
+
# @param transport [#request] API transport.
|
|
9
|
+
# @return [SchedulesResource]
|
|
10
|
+
def initialize(transport)
|
|
11
|
+
@transport = transport
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# List schedules.
|
|
15
|
+
# @param limit [Integer] page size; defaults to 50.
|
|
16
|
+
# @param offset [Integer] page offset; defaults to 0.
|
|
17
|
+
# @return [Hash] `{ "schedules" => [...], "total" => Integer, "limit" => Integer, "offset" => Integer }`.
|
|
18
|
+
def list(limit: 50, offset: 0)
|
|
19
|
+
@transport.request(:get, "/ingestion/schedules", query: { limit: limit, offset: offset })
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Fetch one schedule.
|
|
23
|
+
# @param schedule_id [String]
|
|
24
|
+
# @return [Hash] schedule resource.
|
|
25
|
+
def get(schedule_id)
|
|
26
|
+
@transport.request(:get, "/ingestion/schedules/#{schedule_id}")
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Create a recurring schedule.
|
|
30
|
+
# @param source_id [String] required.
|
|
31
|
+
# @param dataset_id [String] required.
|
|
32
|
+
# @param cron [String] required 5-field cron expression.
|
|
33
|
+
# @param timezone [String, nil] optional IANA timezone (defaults to UTC server-side).
|
|
34
|
+
# @param pipeline_id [String, nil] optional pipeline id.
|
|
35
|
+
# @param enabled [Boolean, nil] optional flag (defaults to true server-side).
|
|
36
|
+
# @param name [String, nil] optional human-readable name.
|
|
37
|
+
# @param metadata [Hash, nil] optional metadata blob.
|
|
38
|
+
# @return [Hash] created schedule.
|
|
39
|
+
def create(source_id:, dataset_id:, cron:, timezone: nil, pipeline_id: nil, enabled: nil, name: nil, metadata: nil)
|
|
40
|
+
body = {
|
|
41
|
+
"source_id" => source_id,
|
|
42
|
+
"dataset_id" => dataset_id,
|
|
43
|
+
"cron" => cron,
|
|
44
|
+
}
|
|
45
|
+
body["timezone"] = timezone unless timezone.nil?
|
|
46
|
+
body["pipeline_id"] = pipeline_id unless pipeline_id.nil?
|
|
47
|
+
body["enabled"] = enabled unless enabled.nil?
|
|
48
|
+
body["name"] = name unless name.nil?
|
|
49
|
+
body["metadata"] = metadata unless metadata.nil?
|
|
50
|
+
@transport.request(:post, "/ingestion/schedules", body: body)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Update a schedule. Only non-nil fields are sent.
|
|
54
|
+
# @param schedule_id [String]
|
|
55
|
+
# @return [Hash] updated schedule.
|
|
56
|
+
def update(schedule_id, cron: nil, timezone: nil, pipeline_id: nil, enabled: nil, name: nil, metadata: nil)
|
|
57
|
+
body = {}
|
|
58
|
+
body["cron"] = cron unless cron.nil?
|
|
59
|
+
body["timezone"] = timezone unless timezone.nil?
|
|
60
|
+
body["pipeline_id"] = pipeline_id unless pipeline_id.nil?
|
|
61
|
+
body["enabled"] = enabled unless enabled.nil?
|
|
62
|
+
body["name"] = name unless name.nil?
|
|
63
|
+
body["metadata"] = metadata unless metadata.nil?
|
|
64
|
+
@transport.request(:patch, "/ingestion/schedules/#{schedule_id}", body: body)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Delete a schedule.
|
|
68
|
+
# @param schedule_id [String]
|
|
69
|
+
# @return [Hash] deletion confirmation envelope.
|
|
70
|
+
def delete(schedule_id)
|
|
71
|
+
@transport.request(:delete, "/ingestion/schedules/#{schedule_id}")
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Trigger an immediate run for a schedule, outside its cron cadence.
|
|
75
|
+
# @param schedule_id [String]
|
|
76
|
+
# @return [Hash] `{ "job_id" => "..." }` for the new ingestion job.
|
|
77
|
+
def trigger(schedule_id)
|
|
78
|
+
@transport.request(:post, "/ingestion/schedules/#{schedule_id}/trigger")
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|