multiwoven-integrations 0.34.8 → 0.34.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/multiwoven/integrations/core/unstructured_source_connector.rb +17 -1
- data/lib/multiwoven/integrations/rollout.rb +1 -1
- data/lib/multiwoven/integrations/source/google_drive/client.rb +110 -217
- data/lib/multiwoven/integrations/source/google_drive/config/spec.json +14 -49
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5c72a9da0aeb56e15b4ba607cb2aaa835c0bd82fc35f21acbfa00fd31aa7d0f9
|
|
4
|
+
data.tar.gz: 66a6b214675cd30fb9e7480c17e507f743030240b76fad18f7e87b180ced54ad
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b1085ca81e3b5701580426bc7046b8d4b8c4d5460ca38f59ca5ff7ec6eb929fa2a60e302aefb2ccc17163e00aaab4d220c591c9a156689fb9391dc4cf9f65c1d
|
|
7
|
+
data.tar.gz: e10ccc0be176977c96a1fbf9c6cda6e0baab3ee9050b6401250f619ed2f72314663ff271e1c78cbdc6be73bd1eedf72891437732d44a5b3e97fcc7e20687bbdf
|
|
@@ -30,8 +30,11 @@ module Multiwoven
|
|
|
30
30
|
source_defined_primary_key: [["element_id"]]
|
|
31
31
|
}.freeze
|
|
32
32
|
|
|
33
|
-
#
|
|
33
|
+
# Data types
|
|
34
34
|
UNSTRUCTURED = "unstructured"
|
|
35
|
+
SEMISTRUCTURED = "semistructured"
|
|
36
|
+
|
|
37
|
+
# Commands for unstructured & semi-structured data operations
|
|
35
38
|
LIST_FILES_CMD = "list_files"
|
|
36
39
|
DOWNLOAD_FILE_CMD = "download_file"
|
|
37
40
|
|
|
@@ -39,6 +42,10 @@ module Multiwoven
|
|
|
39
42
|
connection_config["data_type"] == UNSTRUCTURED
|
|
40
43
|
end
|
|
41
44
|
|
|
45
|
+
def semistructured_data?(connection_config)
|
|
46
|
+
connection_config["data_type"] == SEMISTRUCTURED
|
|
47
|
+
end
|
|
48
|
+
|
|
42
49
|
def create_unstructured_stream
|
|
43
50
|
Multiwoven::Integrations::Protocol::Stream.new(
|
|
44
51
|
name: UNSTRUCTURED,
|
|
@@ -47,6 +54,15 @@ module Multiwoven
|
|
|
47
54
|
**UNSTRUCTURED_STREAM_CONFIG
|
|
48
55
|
)
|
|
49
56
|
end
|
|
57
|
+
|
|
58
|
+
def create_semistructured_stream
|
|
59
|
+
Multiwoven::Integrations::Protocol::Stream.new(
|
|
60
|
+
name: SEMISTRUCTURED,
|
|
61
|
+
action: StreamAction["fetch"],
|
|
62
|
+
json_schema: UNSTRUCTURED_SCHEMA,
|
|
63
|
+
**UNSTRUCTURED_STREAM_CONFIG
|
|
64
|
+
)
|
|
65
|
+
end
|
|
50
66
|
end
|
|
51
67
|
end
|
|
52
68
|
end
|
|
@@ -4,25 +4,36 @@ module Multiwoven::Integrations::Source
|
|
|
4
4
|
module GoogleDrive
|
|
5
5
|
include Multiwoven::Integrations::Core
|
|
6
6
|
|
|
7
|
-
FIELDS = "files(id, name, parents, mimeType), nextPageToken"
|
|
7
|
+
FIELDS = "files(id, name, parents, mimeType, fileExtension, size, createdTime, modifiedTime), nextPageToken"
|
|
8
8
|
MAX_PER_PAGE = 1000
|
|
9
9
|
MIMETYPE_GOOGLE_DRIVE_FOLDER = "mimeType = 'application/vnd.google-apps.folder'"
|
|
10
10
|
|
|
11
|
-
class Client <
|
|
11
|
+
class Client < UnstructuredSourceConnector
|
|
12
12
|
def check_connection(connection_config)
|
|
13
13
|
connection_config = connection_config.with_indifferent_access
|
|
14
|
-
|
|
15
|
-
|
|
14
|
+
|
|
15
|
+
if unstructured_data?(connection_config) || semistructured_data?(connection_config)
|
|
16
|
+
create_drive_connection(connection_config)
|
|
17
|
+
else
|
|
18
|
+
create_connection(connection_config)
|
|
19
|
+
end
|
|
16
20
|
success_status
|
|
17
|
-
rescue StandardError => e
|
|
21
|
+
rescue StandardError, NotImplementedError => e
|
|
18
22
|
failure_status(e)
|
|
19
23
|
end
|
|
20
24
|
|
|
21
|
-
def discover(
|
|
22
|
-
|
|
23
|
-
|
|
25
|
+
def discover(connection_config)
|
|
26
|
+
connection_config = connection_config.with_indifferent_access
|
|
27
|
+
streams = if unstructured_data?(connection_config)
|
|
28
|
+
[create_unstructured_stream]
|
|
29
|
+
elsif semistructured_data?(connection_config)
|
|
30
|
+
[create_semistructured_stream]
|
|
31
|
+
else
|
|
32
|
+
raise NotImplementedError, "Discovery failed: Structured data is not supported yet"
|
|
33
|
+
end
|
|
34
|
+
catalog = Catalog.new(streams: streams)
|
|
24
35
|
catalog.to_multiwoven_message
|
|
25
|
-
rescue StandardError => e
|
|
36
|
+
rescue StandardError, NotImplementedError => e
|
|
26
37
|
handle_exception(e, {
|
|
27
38
|
context: "GOOGLE_DRIVE:DISCOVER:EXCEPTION",
|
|
28
39
|
type: "error"
|
|
@@ -31,12 +42,11 @@ module Multiwoven::Integrations::Source
|
|
|
31
42
|
|
|
32
43
|
def read(sync_config)
|
|
33
44
|
connection_config = sync_config.source.connection_specification.with_indifferent_access
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
rescue StandardError => e
|
|
45
|
+
|
|
46
|
+
return handle_unstructured_data(sync_config) if unstructured_data?(connection_config) || semistructured_data?(connection_config)
|
|
47
|
+
|
|
48
|
+
raise NotImplementedError, "Read failed: Structured data is not supported yet"
|
|
49
|
+
rescue StandardError, NotImplementedError => e
|
|
40
50
|
handle_exception(e, {
|
|
41
51
|
context: "GOOGLE_DRIVE:READ:EXCEPTION",
|
|
42
52
|
type: "error",
|
|
@@ -47,126 +57,103 @@ module Multiwoven::Integrations::Source
|
|
|
47
57
|
|
|
48
58
|
private
|
|
49
59
|
|
|
50
|
-
def
|
|
51
|
-
|
|
52
|
-
offset = 0
|
|
53
|
-
query = query.gsub("\n", " ").gsub(/\s+/, " ")
|
|
54
|
-
limit = query.match(/LIMIT (\d+)/)[1].to_i if query.include? "LIMIT"
|
|
55
|
-
offset = query.match(/OFFSET (\d+)/)[1].to_i if query.include? "OFFSET"
|
|
56
|
-
query = query.match(/\((.*)\) AS/)[1] if query.include? "AS subquery"
|
|
57
|
-
columns = select_columns(query)
|
|
58
|
-
|
|
59
|
-
google_drive_query = build_query(client)
|
|
60
|
-
files = get_files(client, google_drive_query, limit, offset)
|
|
61
|
-
files.map do |file|
|
|
62
|
-
RecordMessage.new(data: prepare_invoice(file, columns), emitted_at: Time.now.to_i).to_multiwoven_message
|
|
63
|
-
end
|
|
60
|
+
def create_connection(connection_config)
|
|
61
|
+
raise NotImplementedError, "Connection failed: Structured data is not supported yet"
|
|
64
62
|
end
|
|
65
63
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
client.get_file(invoice["id"], download_dest: temp_file.path)
|
|
75
|
-
|
|
76
|
-
reader = PDF::Reader.new(temp_file)
|
|
77
|
-
page_count = reader.page_count
|
|
78
|
-
|
|
79
|
-
analysis = if page_count > 1
|
|
80
|
-
start_expense_analysis(invoice["file_name"], temp_file)
|
|
81
|
-
else
|
|
82
|
-
[textract.analyze_expense(document: { bytes: File.binread(temp_file.path) })]
|
|
83
|
-
end
|
|
64
|
+
def create_drive_connection(connection_config)
|
|
65
|
+
credentials = connection_config[:credentials_json]
|
|
66
|
+
@google_drive = Google::Apis::DriveV3::DriveService.new
|
|
67
|
+
@google_drive.authorization = Google::Auth::ServiceAccountCredentials.make_creds(
|
|
68
|
+
json_key_io: StringIO.new(credentials.to_json),
|
|
69
|
+
scope: GOOGLE_SHEETS_SCOPE
|
|
70
|
+
)
|
|
71
|
+
end
|
|
84
72
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
73
|
+
def handle_unstructured_data(sync_config)
|
|
74
|
+
connection_config = sync_config.source.connection_specification.with_indifferent_access
|
|
75
|
+
folder_name = connection_config[:folder_name]
|
|
76
|
+
command = sync_config.model.query.strip
|
|
77
|
+
create_drive_connection(connection_config)
|
|
78
|
+
|
|
79
|
+
case command
|
|
80
|
+
when LIST_FILES_CMD
|
|
81
|
+
list_files_in_folder(folder_name)
|
|
82
|
+
when /^#{DOWNLOAD_FILE_CMD}\s+(.+)$/
|
|
83
|
+
file_name = ::Regexp.last_match(1).strip
|
|
84
|
+
file_name = file_name.gsub(/^["']|["']$/, "") # Remove leading/trailing quotes
|
|
85
|
+
download_file_to_local(file_name, sync_config.sync_id)
|
|
86
|
+
else
|
|
87
|
+
raise ArgumentError, "Invalid command. Supported commands: #{LIST_FILES_CMD}, #{DOWNLOAD_FILE_CMD} <file_path>"
|
|
99
88
|
end
|
|
100
|
-
results
|
|
101
89
|
end
|
|
102
90
|
|
|
103
|
-
def
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
}
|
|
121
|
-
)
|
|
122
|
-
|
|
123
|
-
job_id = resp.job_id
|
|
124
|
-
all_pages = []
|
|
125
|
-
next_token = nil
|
|
126
|
-
|
|
127
|
-
loop do
|
|
128
|
-
result = textract.get_expense_analysis(
|
|
129
|
-
job_id: job_id,
|
|
130
|
-
next_token: next_token
|
|
131
|
-
)
|
|
132
|
-
|
|
133
|
-
status = result.job_status
|
|
134
|
-
if status == "SUCCEEDED"
|
|
135
|
-
all_pages << result
|
|
136
|
-
next_token = result.next_token
|
|
137
|
-
break unless next_token
|
|
138
|
-
elsif %w[FAILED PARTIAL_SUCCESS].include?(status)
|
|
139
|
-
raise "Textract job ended with status: #{status}"
|
|
140
|
-
else
|
|
141
|
-
sleep 2 # still IN_PROGRESS; wait briefly and try again
|
|
142
|
-
end
|
|
91
|
+
def list_files_in_folder(folder_name)
|
|
92
|
+
query = build_query(folder_name)
|
|
93
|
+
records = get_files(@google_drive, query, 10_000, 0)
|
|
94
|
+
records.map do |row|
|
|
95
|
+
RecordMessage.new(
|
|
96
|
+
data: {
|
|
97
|
+
element_id: row.id,
|
|
98
|
+
file_name: row.name,
|
|
99
|
+
file_path: row.name,
|
|
100
|
+
size: row.size,
|
|
101
|
+
file_type: row.file_extension,
|
|
102
|
+
created_date: row.created_time,
|
|
103
|
+
modified_date: row.modified_time,
|
|
104
|
+
text: ""
|
|
105
|
+
},
|
|
106
|
+
emitted_at: Time.now.to_i
|
|
107
|
+
).to_multiwoven_message
|
|
143
108
|
end
|
|
144
|
-
all_pages
|
|
145
109
|
end
|
|
146
110
|
|
|
147
|
-
def
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
111
|
+
def download_file_to_local(file_name, sync_id)
|
|
112
|
+
download_path = ENV["FILE_DOWNLOAD_PATH"]
|
|
113
|
+
file = if download_path
|
|
114
|
+
File.join(download_path, "syncs", sync_id, File.basename(file_name))
|
|
115
|
+
else
|
|
116
|
+
Tempfile.new(["google_drive_file_syncs_#{sync_id}", File.extname(file_name)]).path
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Escape single quotes to prevent query injection
|
|
120
|
+
escaped_name = file_name.gsub("'", "\\\\'")
|
|
121
|
+
query = "mimeType != 'application/vnd.google-apps.folder' and name = '#{escaped_name}'"
|
|
122
|
+
|
|
123
|
+
records = get_files(@google_drive, query, 1, 0)
|
|
124
|
+
raise StandardError, "File not found." if records.empty?
|
|
125
|
+
|
|
126
|
+
@google_drive.get_file(records.first.id, download_dest: file)
|
|
127
|
+
|
|
128
|
+
[RecordMessage.new(
|
|
129
|
+
data: {
|
|
130
|
+
element_id: records.first.id,
|
|
131
|
+
local_path: file,
|
|
132
|
+
file_name: file_name,
|
|
133
|
+
file_path: file_name,
|
|
134
|
+
size: records.first.size,
|
|
135
|
+
file_type: records.first.file_extension,
|
|
136
|
+
created_date: records.first.created_time,
|
|
137
|
+
modified_date: records.first.modified_time,
|
|
138
|
+
text: ""
|
|
139
|
+
},
|
|
140
|
+
emitted_at: Time.now.to_i
|
|
141
|
+
).to_multiwoven_message]
|
|
142
|
+
rescue StandardError => e
|
|
143
|
+
raise StandardError, "Failed to download file #{file_name}: #{e.message}"
|
|
144
|
+
end
|
|
154
145
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
end
|
|
146
|
+
def build_query(folder_name)
|
|
147
|
+
raise ArgumentError, "Folder name is required" if folder_name.blank?
|
|
158
148
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
parents_query = "(#{subfolders_ids.join(" in parents or ")} in parents)"
|
|
165
|
-
end
|
|
149
|
+
# Escape single quotes to prevent query injection
|
|
150
|
+
escaped_folder = folder_name.gsub("'", "\\\\'")
|
|
151
|
+
folder_query = "#{MIMETYPE_GOOGLE_DRIVE_FOLDER} and (name = '#{escaped_folder}')"
|
|
152
|
+
response = @google_drive.list_files(include_items_from_all_drives: true, supports_all_drives: true, q: folder_query, fields: FIELDS)
|
|
153
|
+
raise ArgumentError, "Specified folder does not exist" if response.files.empty?
|
|
166
154
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
query
|
|
155
|
+
parent_id = response.files.first.id
|
|
156
|
+
"'#{parent_id}' in parents"
|
|
170
157
|
end
|
|
171
158
|
|
|
172
159
|
def get_files(client, query, limit, offset)
|
|
@@ -193,100 +180,6 @@ module Multiwoven::Integrations::Source
|
|
|
193
180
|
|
|
194
181
|
result
|
|
195
182
|
end
|
|
196
|
-
|
|
197
|
-
def select_columns(query)
|
|
198
|
-
columns = query.match(/SELECT (.*) FROM/)[1]
|
|
199
|
-
all_columns = %w[line_items id file_name exception results] + TEXTRACT_SUMMARY_FIELDS.keys
|
|
200
|
-
@options[:fields] = all_columns if @options[:fields].empty?
|
|
201
|
-
|
|
202
|
-
return @options[:fields] if columns.include?("*")
|
|
203
|
-
|
|
204
|
-
columns = columns.split(",").map(&:strip)
|
|
205
|
-
raise "Column(s) #{(columns - all_columns).join(", ")} not valid." if (columns - all_columns).length.positive?
|
|
206
|
-
|
|
207
|
-
columns & all_columns
|
|
208
|
-
end
|
|
209
|
-
|
|
210
|
-
def prepare_invoice(file, columns)
|
|
211
|
-
invoice = {}
|
|
212
|
-
columns.each { |column| invoice[column] = "" if TEXTRACT_SUMMARY_FIELDS.key?(column) }
|
|
213
|
-
invoice["line_items"] = [] if columns.any?("line_items")
|
|
214
|
-
invoice["id"] = file.id if columns.any?("id")
|
|
215
|
-
invoice["file_name"] = file.name if columns.any?("file_name")
|
|
216
|
-
invoice["exception"] = "" if columns.any?("exception")
|
|
217
|
-
invoice["results"] = {} if columns.any?("results")
|
|
218
|
-
invoice
|
|
219
|
-
end
|
|
220
|
-
|
|
221
|
-
def create_connection(connection_config)
|
|
222
|
-
@options = connection_config[:options]
|
|
223
|
-
credentials = connection_config[:credentials_json]
|
|
224
|
-
client = Google::Apis::DriveV3::DriveService.new
|
|
225
|
-
client.authorization = Google::Auth::ServiceAccountCredentials.make_creds(
|
|
226
|
-
json_key_io: StringIO.new(credentials.to_json),
|
|
227
|
-
scope: GOOGLE_SHEETS_SCOPE
|
|
228
|
-
)
|
|
229
|
-
client
|
|
230
|
-
end
|
|
231
|
-
|
|
232
|
-
# TODO: Refactor (extract) code for Amazon Textract
|
|
233
|
-
def create_aws_credentials
|
|
234
|
-
access_key_id = ENV["TEXTRACT_ACCESS_KEY_ID"]
|
|
235
|
-
secret_access_key = ENV["TEXTRACT_SECRET_ACCESS_KEY"]
|
|
236
|
-
Aws::Credentials.new(access_key_id, secret_access_key)
|
|
237
|
-
end
|
|
238
|
-
|
|
239
|
-
def create_aws_textract_connection
|
|
240
|
-
region = ENV["TEXTRACT_REGION"]
|
|
241
|
-
credentials = create_aws_credentials
|
|
242
|
-
Aws::Textract::Client.new(region: region, credentials: credentials)
|
|
243
|
-
end
|
|
244
|
-
|
|
245
|
-
def create_aws_s3_connection
|
|
246
|
-
region = ENV["TEXTRACT_REGION"]
|
|
247
|
-
credentials = create_aws_credentials
|
|
248
|
-
Aws::S3::Client.new(region: region, credentials: credentials)
|
|
249
|
-
end
|
|
250
|
-
|
|
251
|
-
def extract_invoice_data(invoice, results)
|
|
252
|
-
invoice = extract_summary_fields(invoice, results)
|
|
253
|
-
invoice = extract_line_items(invoice, results)
|
|
254
|
-
invoice["results"] = results.to_json if invoice.key?("results")
|
|
255
|
-
invoice.transform_keys(&:to_sym)
|
|
256
|
-
end
|
|
257
|
-
|
|
258
|
-
def extract_summary_fields(invoice, results)
|
|
259
|
-
document = results[0].expense_documents[0]
|
|
260
|
-
(invoice.keys & TEXTRACT_SUMMARY_FIELDS.keys).each do |key|
|
|
261
|
-
invoice[key] = extract_field_value(document.summary_fields, TEXTRACT_SUMMARY_FIELDS[key])
|
|
262
|
-
end
|
|
263
|
-
invoice
|
|
264
|
-
end
|
|
265
|
-
|
|
266
|
-
def extract_line_items(invoice, results)
|
|
267
|
-
if invoice.key?("line_items")
|
|
268
|
-
results.each do |result|
|
|
269
|
-
result.expense_documents.each do |expense_document|
|
|
270
|
-
expense_document.line_item_groups.each do |line_item_group|
|
|
271
|
-
line_item_group.line_items.each do |line_item|
|
|
272
|
-
extracted_line_item = {}
|
|
273
|
-
TEXTRACT_LINE_ITEMS_FIELDS.each_key do |key|
|
|
274
|
-
extracted_line_item[key] = extract_field_value(line_item.line_item_expense_fields, TEXTRACT_LINE_ITEMS_FIELDS[key])
|
|
275
|
-
end
|
|
276
|
-
invoice["line_items"] << extracted_line_item
|
|
277
|
-
end
|
|
278
|
-
end
|
|
279
|
-
end
|
|
280
|
-
end
|
|
281
|
-
end
|
|
282
|
-
invoice["line_items"] = invoice["line_items"].to_json
|
|
283
|
-
invoice
|
|
284
|
-
end
|
|
285
|
-
|
|
286
|
-
def extract_field_value(fields, selector)
|
|
287
|
-
selected_field = fields.select { |field| field.type.text == selector }.first
|
|
288
|
-
selected_field ? selected_field.value_detection.text : ""
|
|
289
|
-
end
|
|
290
183
|
end
|
|
291
184
|
end
|
|
292
185
|
end
|
|
@@ -8,6 +8,16 @@
|
|
|
8
8
|
"type": "object",
|
|
9
9
|
"required": ["credentials_json"],
|
|
10
10
|
"properties": {
|
|
11
|
+
"data_type": {
|
|
12
|
+
"description": "Type of data in files",
|
|
13
|
+
"type": "string",
|
|
14
|
+
"title": "Data Format Type",
|
|
15
|
+
"oneOf": [
|
|
16
|
+
{ "const": "structured", "title": "Tables & Records (Structured)" },
|
|
17
|
+
{ "const": "unstructured", "title": "Documents & Files (Unstructured)" },
|
|
18
|
+
{ "const": "semistructured", "title": "Invoices, Receipts, Tables, Forms (Semi-structured)" }
|
|
19
|
+
]
|
|
20
|
+
},
|
|
11
21
|
"credentials_json": {
|
|
12
22
|
"type": "object",
|
|
13
23
|
"description": "You can get the keys from the Google Cloud web console. First, go to the IAM page and select Service Accounts from the left menu. Next, locate your service account in the list, click on its Keys tab, and then click Add Key. Lastly, click Create new key and select JSON.",
|
|
@@ -67,55 +77,10 @@
|
|
|
67
77
|
"universe_domain"
|
|
68
78
|
]
|
|
69
79
|
},
|
|
70
|
-
"
|
|
71
|
-
"
|
|
72
|
-
"
|
|
73
|
-
"title": "
|
|
74
|
-
"required": ["document_type"],
|
|
75
|
-
"properties": {
|
|
76
|
-
"folder": {
|
|
77
|
-
"type": "string",
|
|
78
|
-
"description": "When specified, reads files contained in the folder.",
|
|
79
|
-
"title": "Folder"
|
|
80
|
-
},
|
|
81
|
-
"subfolders": {
|
|
82
|
-
"type": "boolean",
|
|
83
|
-
"default": false,
|
|
84
|
-
"title": "Read from subfolders"
|
|
85
|
-
},
|
|
86
|
-
"file_type": {
|
|
87
|
-
"description": "The type of file to read",
|
|
88
|
-
"type": "string",
|
|
89
|
-
"title": "File Type",
|
|
90
|
-
"enum": [
|
|
91
|
-
"application/pdf"
|
|
92
|
-
]
|
|
93
|
-
},
|
|
94
|
-
"document_type": {
|
|
95
|
-
"type": "string",
|
|
96
|
-
"enum": [ "invoices" ]
|
|
97
|
-
},
|
|
98
|
-
"fields": {
|
|
99
|
-
"type": "array",
|
|
100
|
-
"description": "Leave blank to extract all fields.",
|
|
101
|
-
"items": {
|
|
102
|
-
"type": "string",
|
|
103
|
-
"anyOf": [
|
|
104
|
-
{ "const": "id", "title": "File Id" },
|
|
105
|
-
{ "const": "file_name", "title": "File Name" },
|
|
106
|
-
{ "const": "exception", "title": "Parsing Exception" },
|
|
107
|
-
{ "const": "invoice_number", "title": "Invoice/Receipt Number" },
|
|
108
|
-
{ "const": "invoice_date", "title": "Invoice/Receipt Date" },
|
|
109
|
-
{ "const": "invoice_total", "title": "Invoice/Receipt Total" },
|
|
110
|
-
{ "const": "purchase_order", "title": "Purchase Order Number" },
|
|
111
|
-
{ "const": "line_items", "title": "Invoice Line Items (Code, Description, Quantity, Unit Price, Total Price)"},
|
|
112
|
-
{ "const": "vendor_name", "title": "Vendor Name" },
|
|
113
|
-
{ "const": "results", "title": "Parsing Results" }
|
|
114
|
-
]
|
|
115
|
-
},
|
|
116
|
-
"uniqueItems": true
|
|
117
|
-
}
|
|
118
|
-
}
|
|
80
|
+
"folder_name": {
|
|
81
|
+
"description": "Name of folder to ready files from",
|
|
82
|
+
"type": "string",
|
|
83
|
+
"title": "Folder Name"
|
|
119
84
|
}
|
|
120
85
|
}
|
|
121
86
|
}
|