fluent-plugin-bigquery 0.2.15 → 0.2.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -18
- data/fluent-plugin-bigquery.gemspec +3 -2
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/out_bigquery.rb +148 -57
- data/test/helper.rb +0 -1
- data/test/plugin/test_out_bigquery.rb +109 -0
- metadata +19 -8
- data/lib/fluent/plugin/bigquery/load_request_body_wrapper.rb +0 -173
- data/test/test_load_request_body_wrapper.rb +0 -190
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6283655314f920c8d3f1bab8f387d96c6fe79da0
|
4
|
+
data.tar.gz: f1016e03203cf12c4c26ad62f1c3a05926423fa7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15e484f5df810cd5736711bd70df5a9e34950e10c77118a3b6097fba6f9c1efd9641ac515df547a15b2f9dac653deb3d5b2fa665541a47bf43dba750754d584e
|
7
|
+
data.tar.gz: 1bbcea1f4ec490c69028eca66032b6dca734231fedf431951618b1d5ad08a354395f357e31a028f3e60531ec15e831bbbb0d4f9a70bd48082a57562885564023
|
data/README.md
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
* insert data over streaming inserts
|
6
6
|
* for continuous real-time insertions
|
7
7
|
* https://developers.google.com/bigquery/streaming-data-into-bigquery#usecases
|
8
|
-
*
|
8
|
+
* load data
|
9
9
|
* for data loading as batch jobs, for big amount of data
|
10
10
|
* https://developers.google.com/bigquery/loading-data-into-bigquery
|
11
11
|
|
@@ -20,7 +20,7 @@ Configure insert specifications with target table schema, with your credentials.
|
|
20
20
|
|
21
21
|
```apache
|
22
22
|
<match dummy>
|
23
|
-
type bigquery
|
23
|
+
@type bigquery
|
24
24
|
|
25
25
|
method insert # default
|
26
26
|
|
@@ -47,7 +47,7 @@ For high rate inserts over streaming inserts, you should specify flush intervals
|
|
47
47
|
|
48
48
|
```apache
|
49
49
|
<match dummy>
|
50
|
-
type bigquery
|
50
|
+
@type bigquery
|
51
51
|
|
52
52
|
method insert # default
|
53
53
|
|
@@ -106,6 +106,37 @@ Important options for high rate events are:
|
|
106
106
|
See [Quota policy](https://cloud.google.com/bigquery/streaming-data-into-bigquery#quota)
|
107
107
|
section in the Google BigQuery document.
|
108
108
|
|
109
|
+
### Load
|
110
|
+
```apache
|
111
|
+
<match bigquery>
|
112
|
+
@type bigquery
|
113
|
+
|
114
|
+
method load
|
115
|
+
buffer_type file
|
116
|
+
buffer_path bigquery.*.buffer
|
117
|
+
flush_interval 1800
|
118
|
+
flush_at_shutdown true
|
119
|
+
try_flush_interval 1
|
120
|
+
utc
|
121
|
+
|
122
|
+
auth_method json_key
|
123
|
+
json_key json_key_path.json
|
124
|
+
|
125
|
+
time_format %s
|
126
|
+
time_field time
|
127
|
+
|
128
|
+
project yourproject_id
|
129
|
+
dataset yourdataset_id
|
130
|
+
auto_create_table true
|
131
|
+
table yourtable%{time_slice}
|
132
|
+
schema_path bq_schema.json
|
133
|
+
</match>
|
134
|
+
```
|
135
|
+
|
136
|
+
I recommend to use file buffer and long flush interval.
|
137
|
+
|
138
|
+
__CAUTION: `flush_interval` default is still `0.25` even if `method` is `load` on current version.__
|
139
|
+
|
109
140
|
### Authentication
|
110
141
|
|
111
142
|
There are two methods supported to fetch access token for the service account.
|
@@ -127,7 +158,7 @@ download its JSON key and deploy the key with fluentd.
|
|
127
158
|
|
128
159
|
```apache
|
129
160
|
<match dummy>
|
130
|
-
type bigquery
|
161
|
+
@type bigquery
|
131
162
|
|
132
163
|
auth_method json_key
|
133
164
|
json_key /home/username/.keys/00000000000000000000000000000000-jsonkey.json
|
@@ -144,7 +175,7 @@ You need to only include `private_key` and `client_email` key from JSON key file
|
|
144
175
|
|
145
176
|
```apache
|
146
177
|
<match dummy>
|
147
|
-
type bigquery
|
178
|
+
@type bigquery
|
148
179
|
|
149
180
|
auth_method json_key
|
150
181
|
json_key {"private_key": "-----BEGIN PRIVATE KEY-----\n...", "client_email": "xxx@developer.gserviceaccount.com"}
|
@@ -165,7 +196,7 @@ Compute Engine instance, then you can configure fluentd like this.
|
|
165
196
|
|
166
197
|
```apache
|
167
198
|
<match dummy>
|
168
|
-
type bigquery
|
199
|
+
@type bigquery
|
169
200
|
|
170
201
|
auth_method compute_engine
|
171
202
|
|
@@ -198,6 +229,7 @@ In this authentication method, the credentials returned are determined by the en
|
|
198
229
|
|
199
230
|
### Table id formatting
|
200
231
|
|
232
|
+
#### strftime formatting
|
201
233
|
`table` and `tables` options accept [Time#strftime](http://ruby-doc.org/core-1.9.3/Time.html#method-i-strftime)
|
202
234
|
format to construct table ids.
|
203
235
|
Table ids are formatted at runtime
|
@@ -208,7 +240,7 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
|
|
208
240
|
|
209
241
|
```apache
|
210
242
|
<match dummy>
|
211
|
-
type bigquery
|
243
|
+
@type bigquery
|
212
244
|
|
213
245
|
...
|
214
246
|
|
@@ -220,8 +252,11 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
|
|
220
252
|
</match>
|
221
253
|
```
|
222
254
|
|
255
|
+
#### record attribute formatting
|
223
256
|
The format can be suffixed with attribute name.
|
224
257
|
|
258
|
+
__NOTE: This feature is available only if `method` is `insert`. Because it makes performance impact. Use `%{time_slice}` instead of it.__
|
259
|
+
|
225
260
|
```apache
|
226
261
|
<match dummy>
|
227
262
|
...
|
@@ -233,23 +268,39 @@ The format can be suffixed with attribute name.
|
|
233
268
|
If attribute name is given, the time to be used for formatting is value of each row.
|
234
269
|
The value for the time should be a UNIX time.
|
235
270
|
|
271
|
+
#### time_slice_key formatting
|
236
272
|
Or, the options can use `%{time_slice}` placeholder.
|
237
273
|
`%{time_slice}` is replaced by formatted time slice key at runtime.
|
238
274
|
|
239
275
|
```apache
|
240
276
|
<match dummy>
|
241
|
-
type bigquery
|
242
|
-
|
277
|
+
@type bigquery
|
278
|
+
|
243
279
|
...
|
244
|
-
|
245
|
-
project yourproject_id
|
246
|
-
dataset yourdataset_id
|
247
280
|
table accesslog%{time_slice}
|
248
|
-
|
249
281
|
...
|
250
282
|
</match>
|
251
283
|
```
|
252
284
|
|
285
|
+
#### record attribute value formatting
|
286
|
+
Or, `${attr_name}` placeholder is available to use value of attribute as part of table id.
|
287
|
+
`${attr_name}` is replaced by string value of the attribute specified by `attr_name`.
|
288
|
+
|
289
|
+
__NOTE: This feature is available only if `method` is `insert`.__
|
290
|
+
|
291
|
+
```apache
|
292
|
+
<match dummy>
|
293
|
+
...
|
294
|
+
table accesslog_%Y_%m_${subdomain}
|
295
|
+
...
|
296
|
+
</match>
|
297
|
+
```
|
298
|
+
|
299
|
+
For example value of `subdomain` attribute is `"bq.fluent"`, table id will be like "accesslog_2016_03_bqfluent".
|
300
|
+
|
301
|
+
- any type of attribute is allowed because stringified value will be used as replacement.
|
302
|
+
- acceptable characters are alphabets, digits and `_`. All other characters will be removed.
|
303
|
+
|
253
304
|
### Dynamic table creating
|
254
305
|
|
255
306
|
When `auto_create_table` is set to `true`, try to create the table using BigQuery API when insertion failed with code=404 "Not Found: Table ...".
|
@@ -259,7 +310,7 @@ NOTE: `auto_create_table` option cannot be used with `fetch_schema`. You should
|
|
259
310
|
|
260
311
|
```apache
|
261
312
|
<match dummy>
|
262
|
-
type bigquery
|
313
|
+
@type bigquery
|
263
314
|
|
264
315
|
...
|
265
316
|
|
@@ -283,7 +334,7 @@ you can also specify nested fields by prefixing their belonging record fields.
|
|
283
334
|
|
284
335
|
```apache
|
285
336
|
<match dummy>
|
286
|
-
type bigquery
|
337
|
+
@type bigquery
|
287
338
|
|
288
339
|
...
|
289
340
|
|
@@ -322,7 +373,7 @@ The second method is to specify a path to a BigQuery schema file instead of list
|
|
322
373
|
|
323
374
|
```apache
|
324
375
|
<match dummy>
|
325
|
-
type bigquery
|
376
|
+
@type bigquery
|
326
377
|
|
327
378
|
...
|
328
379
|
|
@@ -339,7 +390,7 @@ The third method is to set `fetch_schema` to `true` to enable fetch a schema usi
|
|
339
390
|
|
340
391
|
```apache
|
341
392
|
<match dummy>
|
342
|
-
type bigquery
|
393
|
+
@type bigquery
|
343
394
|
|
344
395
|
...
|
345
396
|
|
@@ -363,7 +414,7 @@ You can set `insert_id_field` option to specify the field to use as `insertId` p
|
|
363
414
|
|
364
415
|
```apache
|
365
416
|
<match dummy>
|
366
|
-
type bigquery
|
417
|
+
@type bigquery
|
367
418
|
|
368
419
|
...
|
369
420
|
|
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
|
|
11
11
|
spec.description = %q{Fluentd plugin to store data on Google BigQuery, by load, or by stream inserts}
|
12
12
|
spec.summary = %q{Fluentd plugin to store data on Google BigQuery}
|
13
13
|
spec.homepage = "https://github.com/kaizenplatform/fluent-plugin-bigquery"
|
14
|
-
spec.license = "
|
14
|
+
spec.license = "Apache-2.0"
|
15
15
|
|
16
16
|
spec.files = `git ls-files`.split($/)
|
17
17
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
@@ -23,9 +23,10 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_development_dependency "test-unit", "~> 3.0.2"
|
24
24
|
spec.add_development_dependency "test-unit-rr", "~> 1.0.3"
|
25
25
|
|
26
|
-
spec.add_runtime_dependency "google-api-client", "~> 0.9.
|
26
|
+
spec.add_runtime_dependency "google-api-client", "~> 0.9.3"
|
27
27
|
spec.add_runtime_dependency "googleauth", ">= 0.5.0"
|
28
28
|
spec.add_runtime_dependency "multi_json"
|
29
|
+
spec.add_runtime_dependency "activesupport", ">= 3.2"
|
29
30
|
spec.add_runtime_dependency "fluentd"
|
30
31
|
spec.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
|
31
32
|
spec.add_runtime_dependency "fluent-mixin-config-placeholders", ">= 0.3.0"
|
@@ -92,7 +92,7 @@ module Fluent
|
|
92
92
|
|
93
93
|
config_param :insert_id_field, :string, default: nil
|
94
94
|
|
95
|
-
config_param :method, :string, default: 'insert' # or 'load'
|
95
|
+
config_param :method, :string, default: 'insert' # or 'load'
|
96
96
|
|
97
97
|
config_param :load_size_limit, :integer, default: 1000**4 # < 1TB (1024^4) # TODO: not implemented now
|
98
98
|
### method: 'load'
|
@@ -150,6 +150,14 @@ module Fluent
|
|
150
150
|
def configure(conf)
|
151
151
|
super
|
152
152
|
|
153
|
+
if @method == "insert"
|
154
|
+
extend(InsertImplementation)
|
155
|
+
elsif @method == "load"
|
156
|
+
extend(LoadImplementation)
|
157
|
+
else
|
158
|
+
raise Fluend::ConfigError "'method' must be 'insert' or 'load'"
|
159
|
+
end
|
160
|
+
|
153
161
|
case @auth_method
|
154
162
|
when 'private_key'
|
155
163
|
unless @email && @private_key_path
|
@@ -286,6 +294,12 @@ module Fluent
|
|
286
294
|
else
|
287
295
|
current_time
|
288
296
|
end
|
297
|
+
if row && format =~ /\$\{/
|
298
|
+
json = row[:json]
|
299
|
+
format.gsub!(/\$\{\s*(\w+)\s*\}/) do |m|
|
300
|
+
row[:json][$1.to_sym].to_s.gsub(/[^\w]/, '')
|
301
|
+
end
|
302
|
+
end
|
289
303
|
table_id = time.strftime(format)
|
290
304
|
|
291
305
|
if chunk
|
@@ -321,29 +335,6 @@ module Fluent
|
|
321
335
|
raise "failed to create table in bigquery" # TODO: error class
|
322
336
|
end
|
323
337
|
|
324
|
-
def insert(table_id, rows)
|
325
|
-
client.insert_all_table_data(@project, @dataset, table_id, {
|
326
|
-
rows: rows
|
327
|
-
}, {})
|
328
|
-
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
329
|
-
# api_error? -> client cache clear
|
330
|
-
@cached_client = nil
|
331
|
-
|
332
|
-
message = e.message
|
333
|
-
if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ message.to_s
|
334
|
-
# Table Not Found: Auto Create Table
|
335
|
-
create_table(table_id)
|
336
|
-
raise "table created. send rows next time."
|
337
|
-
end
|
338
|
-
log.error "tabledata.insertAll API", project_id: @project, dataset: @dataset, table: table_id, code: e.status_code, message: message
|
339
|
-
raise "failed to insert into bigquery" # TODO: error class
|
340
|
-
end
|
341
|
-
|
342
|
-
def load
|
343
|
-
# https://developers.google.com/bigquery/loading-data-into-bigquery#loaddatapostrequest
|
344
|
-
raise NotImplementedError # TODO
|
345
|
-
end
|
346
|
-
|
347
338
|
def replace_record_key(record)
|
348
339
|
new_record = {}
|
349
340
|
record.each do |key, _|
|
@@ -366,44 +357,13 @@ module Fluent
|
|
366
357
|
record
|
367
358
|
end
|
368
359
|
|
369
|
-
def format(tag, time, record)
|
370
|
-
buf = ''
|
371
|
-
|
372
|
-
if @replace_record_key
|
373
|
-
record = replace_record_key(record)
|
374
|
-
end
|
375
|
-
|
376
|
-
if @convert_hash_to_json
|
377
|
-
record = convert_hash_to_json(record)
|
378
|
-
end
|
379
|
-
|
380
|
-
row = @fields.format(@add_time_field.call(record, time))
|
381
|
-
unless row.empty?
|
382
|
-
row = {"json" => row}
|
383
|
-
row['insert_id'] = @get_insert_id.call(record) if @get_insert_id
|
384
|
-
buf << row.to_msgpack
|
385
|
-
end
|
386
|
-
buf
|
387
|
-
end
|
388
|
-
|
389
360
|
def write(chunk)
|
390
|
-
|
391
|
-
chunk.msgpack_each do |row_object|
|
392
|
-
# TODO: row size limit
|
393
|
-
rows << row_object.deep_symbolize_keys
|
394
|
-
end
|
395
|
-
|
396
|
-
# TODO: method
|
397
|
-
|
398
|
-
insert_table_format = @tables_mutex.synchronize do
|
361
|
+
table_id_format = @tables_mutex.synchronize do
|
399
362
|
t = @tables_queue.shift
|
400
363
|
@tables_queue.push t
|
401
364
|
t
|
402
365
|
end
|
403
|
-
|
404
|
-
rows.group_by {|row| generate_table_id(insert_table_format, Time.at(Fluent::Engine.now), row, chunk) }.each do |table_id, rows|
|
405
|
-
insert(table_id, rows)
|
406
|
-
end
|
366
|
+
_write(chunk, table_id_format)
|
407
367
|
end
|
408
368
|
|
409
369
|
def fetch_schema
|
@@ -422,6 +382,137 @@ module Fluent
|
|
422
382
|
raise "failed to fetch schema from bigquery" # TODO: error class
|
423
383
|
end
|
424
384
|
|
385
|
+
module InsertImplementation
|
386
|
+
def format(tag, time, record)
|
387
|
+
buf = ''
|
388
|
+
|
389
|
+
if @replace_record_key
|
390
|
+
record = replace_record_key(record)
|
391
|
+
end
|
392
|
+
|
393
|
+
if @convert_hash_to_json
|
394
|
+
record = convert_hash_to_json(record)
|
395
|
+
end
|
396
|
+
|
397
|
+
row = @fields.format(@add_time_field.call(record, time))
|
398
|
+
unless row.empty?
|
399
|
+
row = {"json" => row}
|
400
|
+
row['insert_id'] = @get_insert_id.call(record) if @get_insert_id
|
401
|
+
buf << row.to_msgpack
|
402
|
+
end
|
403
|
+
buf
|
404
|
+
end
|
405
|
+
|
406
|
+
def _write(chunk, table_format)
|
407
|
+
rows = []
|
408
|
+
chunk.msgpack_each do |row_object|
|
409
|
+
# TODO: row size limit
|
410
|
+
rows << row_object.deep_symbolize_keys
|
411
|
+
end
|
412
|
+
|
413
|
+
rows.group_by {|row| generate_table_id(table_format, Time.at(Fluent::Engine.now), row, chunk) }.each do |table_id, group|
|
414
|
+
insert(table_id, group)
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
418
|
+
def insert(table_id, rows)
|
419
|
+
client.insert_all_table_data(@project, @dataset, table_id, {
|
420
|
+
rows: rows
|
421
|
+
}, {})
|
422
|
+
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
423
|
+
# api_error? -> client cache clear
|
424
|
+
@cached_client = nil
|
425
|
+
|
426
|
+
message = e.message
|
427
|
+
if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ message.to_s
|
428
|
+
# Table Not Found: Auto Create Table
|
429
|
+
create_table(table_id)
|
430
|
+
raise "table created. send rows next time."
|
431
|
+
end
|
432
|
+
log.error "tabledata.insertAll API", project_id: @project, dataset: @dataset, table: table_id, code: e.status_code, message: message
|
433
|
+
raise "failed to insert into bigquery" # TODO: error class
|
434
|
+
end
|
435
|
+
end
|
436
|
+
|
437
|
+
module LoadImplementation
|
438
|
+
def format(tag, time, record)
|
439
|
+
buf = ''
|
440
|
+
|
441
|
+
if @replace_record_key
|
442
|
+
record = replace_record_key(record)
|
443
|
+
end
|
444
|
+
row = @fields.format(@add_time_field.call(record, time))
|
445
|
+
unless row.empty?
|
446
|
+
buf << MultiJson.dump(row) + "\n"
|
447
|
+
end
|
448
|
+
buf
|
449
|
+
end
|
450
|
+
|
451
|
+
def _write(chunk, table_id_format)
|
452
|
+
table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now), nil, chunk)
|
453
|
+
load(chunk, table_id)
|
454
|
+
end
|
455
|
+
|
456
|
+
def load(chunk, table_id)
|
457
|
+
res = nil
|
458
|
+
create_upload_source(chunk) do |upload_source|
|
459
|
+
res = client.insert_job(@project, {
|
460
|
+
configuration: {
|
461
|
+
load: {
|
462
|
+
destination_table: {
|
463
|
+
project_id: @project,
|
464
|
+
dataset_id: @dataset,
|
465
|
+
table_id: table_id,
|
466
|
+
},
|
467
|
+
schema: {
|
468
|
+
fields: @fields.to_a,
|
469
|
+
},
|
470
|
+
write_disposition: "WRITE_APPEND",
|
471
|
+
source_format: "NEWLINE_DELIMITED_JSON"
|
472
|
+
}
|
473
|
+
}
|
474
|
+
}, {upload_source: upload_source, content_type: "application/octet-stream"})
|
475
|
+
end
|
476
|
+
wait_load(res, table_id)
|
477
|
+
end
|
478
|
+
|
479
|
+
private
|
480
|
+
|
481
|
+
def wait_load(res, table_id)
|
482
|
+
wait_interval = 10
|
483
|
+
_response = res
|
484
|
+
until _response.status.state == "DONE"
|
485
|
+
log.debug "wait for load job finish", state: _response.status.state
|
486
|
+
sleep wait_interval
|
487
|
+
_response = client.get_job(@project, _response.job_reference.job_id)
|
488
|
+
end
|
489
|
+
|
490
|
+
if _response.status.error_result
|
491
|
+
log.error "job.insert API", project_id: @project, dataset: @dataset, table: table_id, message: _response.status.error_result.message
|
492
|
+
raise "failed to load into bigquery"
|
493
|
+
end
|
494
|
+
|
495
|
+
log.debug "finish load job", state: _response.status.state
|
496
|
+
end
|
497
|
+
|
498
|
+
def create_upload_source(chunk)
|
499
|
+
chunk_is_file = @buffer_type == 'file'
|
500
|
+
if chunk_is_file
|
501
|
+
File.open(chunk.path) do |file|
|
502
|
+
yield file
|
503
|
+
end
|
504
|
+
else
|
505
|
+
Tempfile.open("chunk-tmp") do |file|
|
506
|
+
file.binmode
|
507
|
+
chunk.write_to(file)
|
508
|
+
file.sync
|
509
|
+
file.rewind
|
510
|
+
yield file
|
511
|
+
end
|
512
|
+
end
|
513
|
+
end
|
514
|
+
end
|
515
|
+
|
425
516
|
class FieldSchema
|
426
517
|
def initialize(name, mode = :nullable)
|
427
518
|
unless [:nullable, :required, :repeated].include?(mode)
|
data/test/helper.rb
CHANGED
@@ -710,6 +710,35 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
710
710
|
assert_equal expected, MessagePack.unpack(buf)
|
711
711
|
end
|
712
712
|
|
713
|
+
def test_format_for_load
|
714
|
+
now = Time.now
|
715
|
+
input = [
|
716
|
+
now,
|
717
|
+
{
|
718
|
+
"uuid" => "9ABFF756-0267-4247-847F-0895B65F0938",
|
719
|
+
}
|
720
|
+
]
|
721
|
+
expected = MultiJson.dump({
|
722
|
+
"uuid" => "9ABFF756-0267-4247-847F-0895B65F0938",
|
723
|
+
}) + "\n"
|
724
|
+
|
725
|
+
driver = create_driver(<<-CONFIG)
|
726
|
+
method load
|
727
|
+
table foo
|
728
|
+
email foo@bar.example
|
729
|
+
private_key_path /path/to/key
|
730
|
+
project yourproject_id
|
731
|
+
dataset yourdataset_id
|
732
|
+
|
733
|
+
field_string uuid
|
734
|
+
CONFIG
|
735
|
+
driver.instance.start
|
736
|
+
buf = driver.instance.format_stream("my.tag", [input])
|
737
|
+
driver.instance.shutdown
|
738
|
+
|
739
|
+
assert_equal expected, buf
|
740
|
+
end
|
741
|
+
|
713
742
|
def test_empty_value_in_required
|
714
743
|
now = Time.now
|
715
744
|
input = [
|
@@ -857,6 +886,66 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
857
886
|
driver.instance.shutdown
|
858
887
|
end
|
859
888
|
|
889
|
+
def test_write_for_load
|
890
|
+
schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
|
891
|
+
entry = {a: "b"}, {b: "c"}
|
892
|
+
driver = create_driver(<<-CONFIG)
|
893
|
+
method load
|
894
|
+
table foo
|
895
|
+
email foo@bar.example
|
896
|
+
private_key_path /path/to/key
|
897
|
+
project yourproject_id
|
898
|
+
dataset yourdataset_id
|
899
|
+
|
900
|
+
time_format %s
|
901
|
+
time_field time
|
902
|
+
|
903
|
+
schema_path #{schema_path}
|
904
|
+
field_integer time
|
905
|
+
CONFIG
|
906
|
+
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys).tap do |h|
|
907
|
+
h[0][:type] = "INTEGER"
|
908
|
+
h[0][:mode] = "NULLABLE"
|
909
|
+
end
|
910
|
+
|
911
|
+
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
912
|
+
io = StringIO.new("hello")
|
913
|
+
mock(driver.instance).create_upload_source(chunk).yields(io)
|
914
|
+
mock_client(driver) do |expect|
|
915
|
+
expect.insert_job('yourproject_id', {
|
916
|
+
configuration: {
|
917
|
+
load: {
|
918
|
+
destination_table: {
|
919
|
+
project_id: 'yourproject_id',
|
920
|
+
dataset_id: 'yourdataset_id',
|
921
|
+
table_id: 'foo',
|
922
|
+
},
|
923
|
+
schema: {
|
924
|
+
fields: schema_fields,
|
925
|
+
},
|
926
|
+
write_disposition: "WRITE_APPEND",
|
927
|
+
source_format: "NEWLINE_DELIMITED_JSON"
|
928
|
+
}
|
929
|
+
}
|
930
|
+
}, {upload_source: io, content_type: "application/octet-stream"}) {
|
931
|
+
s = stub!
|
932
|
+
status_stub = stub!
|
933
|
+
s.status { status_stub }
|
934
|
+
status_stub.state { "DONE" }
|
935
|
+
status_stub.error_result { nil }
|
936
|
+
s
|
937
|
+
}
|
938
|
+
end
|
939
|
+
|
940
|
+
entry.each do |e|
|
941
|
+
chunk << MultiJson.dump(e) + "\n"
|
942
|
+
end
|
943
|
+
|
944
|
+
driver.instance.start
|
945
|
+
driver.instance.write(chunk)
|
946
|
+
driver.instance.shutdown
|
947
|
+
end
|
948
|
+
|
860
949
|
def test_write_with_row_based_table_id_formatting
|
861
950
|
entry = [
|
862
951
|
{json: {a: "b", created_at: Time.local(2014,8,20,9,0,0).to_i}},
|
@@ -935,6 +1024,26 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
935
1024
|
assert_equal 'foo_20140811', table_id
|
936
1025
|
end
|
937
1026
|
|
1027
|
+
def test_generate_table_id_with_attribute_replacement
|
1028
|
+
driver = create_driver
|
1029
|
+
table_id_format = 'foo_%Y_%m_%d_${baz}'
|
1030
|
+
current_time = Time.now
|
1031
|
+
time = Time.local(2014, 8, 11, 21, 20, 56)
|
1032
|
+
[
|
1033
|
+
[ { baz: 1234 }, 'foo_2014_08_11_1234' ],
|
1034
|
+
[ { baz: 'piyo' }, 'foo_2014_08_11_piyo' ],
|
1035
|
+
[ { baz: true }, 'foo_2014_08_11_true' ],
|
1036
|
+
[ { baz: nil }, 'foo_2014_08_11_' ],
|
1037
|
+
[ { baz: '' }, 'foo_2014_08_11_' ],
|
1038
|
+
[ { baz: "_X-Y.Z !\n" }, 'foo_2014_08_11__XYZ' ],
|
1039
|
+
[ { baz: { xyz: 1 } }, 'foo_2014_08_11_xyz1' ],
|
1040
|
+
].each do |attrs, expected|
|
1041
|
+
row = { json: { created_at: Time.local(2014,8,10,21,20,57).to_i }.merge(attrs) }
|
1042
|
+
table_id = driver.instance.generate_table_id(table_id_format, time, row)
|
1043
|
+
assert_equal expected, table_id
|
1044
|
+
end
|
1045
|
+
end
|
1046
|
+
|
938
1047
|
def test_auto_create_table_by_bigquery_api
|
939
1048
|
now = Time.now
|
940
1049
|
message = {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-03-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -72,14 +72,14 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 0.9.
|
75
|
+
version: 0.9.3
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 0.9.
|
82
|
+
version: 0.9.3
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: googleauth
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: activesupport
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '3.2'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '3.2'
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: fluentd
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -193,7 +207,6 @@ files:
|
|
193
207
|
- README.md
|
194
208
|
- Rakefile
|
195
209
|
- fluent-plugin-bigquery.gemspec
|
196
|
-
- lib/fluent/plugin/bigquery/load_request_body_wrapper.rb
|
197
210
|
- lib/fluent/plugin/bigquery/version.rb
|
198
211
|
- lib/fluent/plugin/out_bigquery.rb
|
199
212
|
- test/helper.rb
|
@@ -201,10 +214,9 @@ files:
|
|
201
214
|
- test/plugin/testdata/apache.schema
|
202
215
|
- test/plugin/testdata/json_key.json
|
203
216
|
- test/plugin/testdata/sudo.schema
|
204
|
-
- test/test_load_request_body_wrapper.rb
|
205
217
|
homepage: https://github.com/kaizenplatform/fluent-plugin-bigquery
|
206
218
|
licenses:
|
207
|
-
-
|
219
|
+
- Apache-2.0
|
208
220
|
metadata: {}
|
209
221
|
post_install_message:
|
210
222
|
rdoc_options: []
|
@@ -232,4 +244,3 @@ test_files:
|
|
232
244
|
- test/plugin/testdata/apache.schema
|
233
245
|
- test/plugin/testdata/json_key.json
|
234
246
|
- test/plugin/testdata/sudo.schema
|
235
|
-
- test/test_load_request_body_wrapper.rb
|
@@ -1,173 +0,0 @@
|
|
1
|
-
module Fluent
|
2
|
-
module BigQueryPlugin
|
3
|
-
class LoadRequestBodyWrapper
|
4
|
-
# body can be a instance of IO (#rewind, #read, #to_str)
|
5
|
-
# http://rubydoc.info/github/google/google-api-ruby-client/Google/APIClient/Request#body-instance_method
|
6
|
-
|
7
|
-
# http://rubydoc.info/github/google/google-api-ruby-client/Google/APIClient#execute-instance_method
|
8
|
-
# (Google::APIClient::Method) api_method: The method object or the RPC name of the method being executed.
|
9
|
-
# (Hash, Array) parameters: The parameters to send to the method.
|
10
|
-
# (String) body: The body of the request.
|
11
|
-
# (Hash, Array) headers: The HTTP headers for the request.
|
12
|
-
# (Hash) options: A set of options for the request, of which:
|
13
|
-
# (#generate_authenticated_request) :authorization (default: true)
|
14
|
-
# - The authorization mechanism for the response. Used only if :authenticated is true.
|
15
|
-
# (TrueClass, FalseClass) :authenticated (default: true)
|
16
|
-
# - true if the request must be signed or somehow authenticated, false otherwise.
|
17
|
-
# (TrueClass, FalseClass) :gzip (default: true) - true if gzip enabled, false otherwise.
|
18
|
-
|
19
|
-
# https://developers.google.com/bigquery/loading-data-into-bigquery#loaddatapostrequest
|
20
|
-
|
21
|
-
JSON_PRETTY_DUMP = JSON::State.new(space: " ", indent:" ", object_nl:"\n", array_nl:"\n")
|
22
|
-
|
23
|
-
CONTENT_TYPE_FIRST = "Content-Type: application/json; charset=UTF-8\n\n"
|
24
|
-
CONTENT_TYPE_SECOND = "Content-Type: application/octet-stream\n\n"
|
25
|
-
|
26
|
-
MULTIPART_BOUNDARY = "--xxx\n"
|
27
|
-
MULTIPART_BOUNDARY_END = "--xxx--\n"
|
28
|
-
|
29
|
-
def initialize(project_id, dataset_id, table_id, field_defs, buffer)
|
30
|
-
@metadata = {
|
31
|
-
configuration: {
|
32
|
-
load: {
|
33
|
-
sourceFormat: "<required for JSON files>",
|
34
|
-
schema: {
|
35
|
-
fields: field_defs
|
36
|
-
},
|
37
|
-
destinationTable: {
|
38
|
-
projectId: project_id,
|
39
|
-
datasetId: dataset_id,
|
40
|
-
tableId: table_id
|
41
|
-
}
|
42
|
-
}
|
43
|
-
}
|
44
|
-
}
|
45
|
-
|
46
|
-
@non_buffer = MULTIPART_BOUNDARY + CONTENT_TYPE_FIRST + @metadata.to_json(JSON_PRETTY_DUMP) + "\n" +
|
47
|
-
MULTIPART_BOUNDARY + CONTENT_TYPE_SECOND
|
48
|
-
@non_buffer.force_encoding("ASCII-8BIT")
|
49
|
-
@non_buffer_bytesize = @non_buffer.bytesize
|
50
|
-
|
51
|
-
@buffer = buffer # read
|
52
|
-
@buffer_bytesize = @buffer.size # Fluentd Buffer Chunk #size -> bytesize
|
53
|
-
|
54
|
-
@footer = MULTIPART_BOUNDARY_END.force_encoding("ASCII-8BIT")
|
55
|
-
|
56
|
-
@contents_bytesize = @non_buffer_bytesize + @buffer_bytesize
|
57
|
-
@total_bytesize = @contents_bytesize + MULTIPART_BOUNDARY_END.bytesize
|
58
|
-
|
59
|
-
@whole_data = nil
|
60
|
-
|
61
|
-
@counter = 0
|
62
|
-
@eof = false
|
63
|
-
end
|
64
|
-
|
65
|
-
# sample_body = <<EOF
|
66
|
-
# --xxx
|
67
|
-
# Content-Type: application/json; charset=UTF-8
|
68
|
-
#
|
69
|
-
# {
|
70
|
-
# "configuration": {
|
71
|
-
# "load": {
|
72
|
-
# "sourceFormat": "<required for JSON files>",
|
73
|
-
# "schema": {
|
74
|
-
# "fields": [
|
75
|
-
# {"name":"f1", "type":"STRING"},
|
76
|
-
# {"name":"f2", "type":"INTEGER"}
|
77
|
-
# ]
|
78
|
-
# },
|
79
|
-
# "destinationTable": {
|
80
|
-
# "projectId": "projectId",
|
81
|
-
# "datasetId": "datasetId",
|
82
|
-
# "tableId": "tableId"
|
83
|
-
# }
|
84
|
-
# }
|
85
|
-
# }
|
86
|
-
# }
|
87
|
-
# --xxx
|
88
|
-
# Content-Type: application/octet-stream
|
89
|
-
#
|
90
|
-
# <your data>
|
91
|
-
# --xxx--
|
92
|
-
# EOF
|
93
|
-
def rewind
|
94
|
-
@counter = 0
|
95
|
-
@eof = false
|
96
|
-
end
|
97
|
-
|
98
|
-
def eof?
|
99
|
-
@eof
|
100
|
-
end
|
101
|
-
|
102
|
-
def to_str
|
103
|
-
rewind
|
104
|
-
self.read # all data
|
105
|
-
end
|
106
|
-
|
107
|
-
def read(length=nil, outbuf="")
|
108
|
-
raise ArgumentError, "negative read length" if length && length < 0
|
109
|
-
return (length.nil? || length == 0) ? "" : nil if @eof
|
110
|
-
return outbuf if length == 0
|
111
|
-
|
112
|
-
# read all data
|
113
|
-
if length.nil? || length >= @total_bytesize
|
114
|
-
@whole_data ||= @buffer.read.force_encoding("ASCII-8BIT")
|
115
|
-
|
116
|
-
if @counter.zero?
|
117
|
-
outbuf.replace(@non_buffer)
|
118
|
-
outbuf << @whole_data
|
119
|
-
outbuf << @footer
|
120
|
-
elsif @counter < @non_buffer_bytesize
|
121
|
-
outbuf.replace(@non_buffer[ @counter .. -1 ])
|
122
|
-
outbuf << @whole_data
|
123
|
-
outbuf << @footer
|
124
|
-
elsif @counter < @contents_bytesize
|
125
|
-
outbuf.replace(@whole_data[ (@counter - @non_buffer_bytesize) .. -1 ])
|
126
|
-
outbuf << @footer
|
127
|
-
else
|
128
|
-
outbuf.replace(@footer[ (@counter - @contents_bytesize) .. -1 ])
|
129
|
-
end
|
130
|
-
@counter = @total_bytesize
|
131
|
-
@eof = true
|
132
|
-
return outbuf
|
133
|
-
end
|
134
|
-
|
135
|
-
# In ruby script level (non-ext module), we cannot prevent to change outbuf length or object re-assignment
|
136
|
-
outbuf.replace("")
|
137
|
-
|
138
|
-
# return first part (metadata)
|
139
|
-
if @counter < @non_buffer_bytesize
|
140
|
-
non_buffer_part = @non_buffer[@counter, length]
|
141
|
-
if non_buffer_part
|
142
|
-
outbuf << non_buffer_part
|
143
|
-
length -= non_buffer_part.bytesize
|
144
|
-
@counter += non_buffer_part.bytesize
|
145
|
-
end
|
146
|
-
end
|
147
|
-
return outbuf if length < 1
|
148
|
-
|
149
|
-
# return second part (buffer content)
|
150
|
-
if @counter < @contents_bytesize
|
151
|
-
@whole_data ||= @buffer.read.force_encoding("ASCII-8BIT")
|
152
|
-
buffer_part = @whole_data[@counter - @non_buffer_bytesize, length]
|
153
|
-
if buffer_part
|
154
|
-
outbuf << buffer_part
|
155
|
-
length -= buffer_part.bytesize
|
156
|
-
@counter += buffer_part.bytesize
|
157
|
-
end
|
158
|
-
end
|
159
|
-
return outbuf if length < 1
|
160
|
-
|
161
|
-
# return footer
|
162
|
-
footer_part = @footer[@counter - @contents_bytesize, length]
|
163
|
-
if footer_part
|
164
|
-
outbuf << footer_part
|
165
|
-
@counter += footer_part.bytesize
|
166
|
-
@eof = true if @counter >= @total_bytesize
|
167
|
-
end
|
168
|
-
|
169
|
-
outbuf
|
170
|
-
end
|
171
|
-
end
|
172
|
-
end
|
173
|
-
end
|
@@ -1,190 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
require 'helper'
|
3
|
-
require 'json'
|
4
|
-
require 'tempfile'
|
5
|
-
|
6
|
-
class LoadRequestBodyWrapperTest < Test::Unit::TestCase
|
7
|
-
def content_alphabet(repeat)
|
8
|
-
(0...repeat).map{|i| "#{i}0123456789\n" }.join
|
9
|
-
end
|
10
|
-
|
11
|
-
def content_kana(repeat)
|
12
|
-
(0...repeat).map{|i| "#{i}あいうえおかきくけこ\n" }.join
|
13
|
-
end
|
14
|
-
|
15
|
-
def mem_chunk(repeat=10, kana=false)
|
16
|
-
content = kana ? content_kana(repeat) : content_alphabet(repeat)
|
17
|
-
Fluent::MemoryBufferChunk.new('bc_mem', content)
|
18
|
-
end
|
19
|
-
|
20
|
-
def file_chunk(repeat=10, kana=false)
|
21
|
-
content = kana ? content_kana(repeat) : content_alphabet(repeat)
|
22
|
-
tmpfile = Tempfile.new('fluent_bigquery_plugin_test')
|
23
|
-
buf = Fluent::FileBufferChunk.new('bc_mem', tmpfile.path, tmpfile.object_id)
|
24
|
-
buf << content
|
25
|
-
buf
|
26
|
-
end
|
27
|
-
|
28
|
-
def field_defs
|
29
|
-
[{"name" => "field1", "type" => "STRING"}, {"name" => "field2", "type" => "INTEGER"}]
|
30
|
-
end
|
31
|
-
|
32
|
-
def check_meta(blank, first, last)
|
33
|
-
assert_equal "", blank
|
34
|
-
|
35
|
-
header1, body1 = first.split("\n\n")
|
36
|
-
assert_equal "Content-Type: application/json; charset=UTF-8", header1
|
37
|
-
metadata = JSON.parse(body1)
|
38
|
-
assert_equal "<required for JSON files>", metadata["configuration"]["load"]["sourceFormat"]
|
39
|
-
assert_equal "field1", metadata["configuration"]["load"]["schema"]["fields"][0]["name"]
|
40
|
-
assert_equal "STRING", metadata["configuration"]["load"]["schema"]["fields"][0]["type"]
|
41
|
-
assert_equal "field2", metadata["configuration"]["load"]["schema"]["fields"][1]["name"]
|
42
|
-
assert_equal "INTEGER", metadata["configuration"]["load"]["schema"]["fields"][1]["type"]
|
43
|
-
assert_equal "pname1", metadata["configuration"]["load"]["destinationTable"]["projectId"]
|
44
|
-
assert_equal "dname1", metadata["configuration"]["load"]["destinationTable"]["datasetId"]
|
45
|
-
assert_equal "tname1", metadata["configuration"]["load"]["destinationTable"]["tableId"]
|
46
|
-
|
47
|
-
assert_equal "--\n", last
|
48
|
-
end
|
49
|
-
|
50
|
-
def check_ascii(data)
|
51
|
-
blank, first, second, last = data.split(/--xxx\n?/)
|
52
|
-
|
53
|
-
check_meta(blank, first, last)
|
54
|
-
|
55
|
-
header2, body2 = second.split("\n\n")
|
56
|
-
assert_equal "Content-Type: application/octet-stream", header2
|
57
|
-
i = 0
|
58
|
-
body2.each_line do |line|
|
59
|
-
assert_equal "#{i}0123456789\n", line
|
60
|
-
i += 1
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
def check_kana(data)
|
65
|
-
blank, first, second, last = data.split(/--xxx\n?/)
|
66
|
-
|
67
|
-
check_meta(blank, first, last)
|
68
|
-
|
69
|
-
header2, body2 = second.split("\n\n")
|
70
|
-
assert_equal "Content-Type: application/octet-stream", header2
|
71
|
-
i = 0
|
72
|
-
body2.each_line do |line|
|
73
|
-
assert_equal "#{i}あいうえおかきくけこ\n", line
|
74
|
-
i += 1
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def setup
|
79
|
-
@klass = Fluent::BigQueryPlugin::LoadRequestBodyWrapper
|
80
|
-
self
|
81
|
-
end
|
82
|
-
|
83
|
-
def test_memory_buf
|
84
|
-
d1 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), mem_chunk(10))
|
85
|
-
data1 = d1.read.force_encoding("UTF-8")
|
86
|
-
check_ascii(data1)
|
87
|
-
|
88
|
-
d2 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), mem_chunk(10))
|
89
|
-
data2 = ""
|
90
|
-
while !d2.eof? do
|
91
|
-
buf = " "
|
92
|
-
objid = buf.object_id
|
93
|
-
data2 << d2.read(20, buf)
|
94
|
-
assert_equal objid, buf.object_id
|
95
|
-
end
|
96
|
-
data2.force_encoding("UTF-8")
|
97
|
-
|
98
|
-
assert_equal data1.size, data2.size
|
99
|
-
end
|
100
|
-
|
101
|
-
def test_memory_buf2
|
102
|
-
d1 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), mem_chunk(100000))
|
103
|
-
data1 = d1.read.force_encoding("UTF-8")
|
104
|
-
check_ascii(data1)
|
105
|
-
|
106
|
-
d2 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), mem_chunk(100000))
|
107
|
-
data2 = ""
|
108
|
-
while !d2.eof? do
|
109
|
-
buf = " "
|
110
|
-
objid = buf.object_id
|
111
|
-
data2 << d2.read(2048, buf)
|
112
|
-
assert_equal objid, buf.object_id
|
113
|
-
end
|
114
|
-
data2.force_encoding("UTF-8")
|
115
|
-
|
116
|
-
assert_equal data1.size, data2.size
|
117
|
-
end
|
118
|
-
|
119
|
-
def test_memory_buf3 # kana
|
120
|
-
d1 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), mem_chunk(100000, true))
|
121
|
-
data1 = d1.read.force_encoding("UTF-8")
|
122
|
-
check_kana(data1)
|
123
|
-
|
124
|
-
d2 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), mem_chunk(100000, true))
|
125
|
-
data2 = ""
|
126
|
-
while !d2.eof? do
|
127
|
-
buf = " "
|
128
|
-
objid = buf.object_id
|
129
|
-
data2 << d2.read(2048, buf)
|
130
|
-
assert_equal objid, buf.object_id
|
131
|
-
end
|
132
|
-
data2.force_encoding("UTF-8")
|
133
|
-
|
134
|
-
assert_equal data1.size, data2.size
|
135
|
-
end
|
136
|
-
|
137
|
-
def test_file_buf
|
138
|
-
d1 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), file_chunk(10))
|
139
|
-
data1 = d1.read.force_encoding("UTF-8")
|
140
|
-
check_ascii(data1)
|
141
|
-
|
142
|
-
d2 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), file_chunk(10))
|
143
|
-
data2 = ""
|
144
|
-
while !d2.eof? do
|
145
|
-
buf = " "
|
146
|
-
objid = buf.object_id
|
147
|
-
data2 << d2.read(20, buf)
|
148
|
-
assert_equal objid, buf.object_id
|
149
|
-
end
|
150
|
-
data2.force_encoding("UTF-8")
|
151
|
-
|
152
|
-
assert_equal data1.size, data2.size
|
153
|
-
end
|
154
|
-
|
155
|
-
def test_file_buf2
|
156
|
-
d1 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), file_chunk(100000))
|
157
|
-
data1 = d1.read.force_encoding("UTF-8")
|
158
|
-
check_ascii(data1)
|
159
|
-
|
160
|
-
d2 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), file_chunk(100000))
|
161
|
-
data2 = ""
|
162
|
-
while !d2.eof? do
|
163
|
-
buf = " "
|
164
|
-
objid = buf.object_id
|
165
|
-
data2 << d2.read(20480, buf)
|
166
|
-
assert_equal objid, buf.object_id
|
167
|
-
end
|
168
|
-
data2.force_encoding("UTF-8")
|
169
|
-
|
170
|
-
assert_equal data1.size, data2.size
|
171
|
-
end
|
172
|
-
|
173
|
-
def test_file_buf3 # kana
|
174
|
-
d1 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), file_chunk(100000, true))
|
175
|
-
data1 = d1.read.force_encoding("UTF-8")
|
176
|
-
check_kana(data1)
|
177
|
-
|
178
|
-
d2 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), file_chunk(100000, true))
|
179
|
-
data2 = ""
|
180
|
-
while !d2.eof? do
|
181
|
-
buf = " "
|
182
|
-
objid = buf.object_id
|
183
|
-
data2 << d2.read(20480, buf)
|
184
|
-
assert_equal objid, buf.object_id
|
185
|
-
end
|
186
|
-
data2.force_encoding("UTF-8")
|
187
|
-
|
188
|
-
assert_equal data1.size, data2.size
|
189
|
-
end
|
190
|
-
end
|