fluent-plugin-bigquery 0.2.15 → 0.2.16
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +69 -18
- data/fluent-plugin-bigquery.gemspec +3 -2
- data/lib/fluent/plugin/bigquery/version.rb +1 -1
- data/lib/fluent/plugin/out_bigquery.rb +148 -57
- data/test/helper.rb +0 -1
- data/test/plugin/test_out_bigquery.rb +109 -0
- metadata +19 -8
- data/lib/fluent/plugin/bigquery/load_request_body_wrapper.rb +0 -173
- data/test/test_load_request_body_wrapper.rb +0 -190
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6283655314f920c8d3f1bab8f387d96c6fe79da0
|
4
|
+
data.tar.gz: f1016e03203cf12c4c26ad62f1c3a05926423fa7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15e484f5df810cd5736711bd70df5a9e34950e10c77118a3b6097fba6f9c1efd9641ac515df547a15b2f9dac653deb3d5b2fa665541a47bf43dba750754d584e
|
7
|
+
data.tar.gz: 1bbcea1f4ec490c69028eca66032b6dca734231fedf431951618b1d5ad08a354395f357e31a028f3e60531ec15e831bbbb0d4f9a70bd48082a57562885564023
|
data/README.md
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
* insert data over streaming inserts
|
6
6
|
* for continuous real-time insertions
|
7
7
|
* https://developers.google.com/bigquery/streaming-data-into-bigquery#usecases
|
8
|
-
*
|
8
|
+
* load data
|
9
9
|
* for data loading as batch jobs, for big amount of data
|
10
10
|
* https://developers.google.com/bigquery/loading-data-into-bigquery
|
11
11
|
|
@@ -20,7 +20,7 @@ Configure insert specifications with target table schema, with your credentials.
|
|
20
20
|
|
21
21
|
```apache
|
22
22
|
<match dummy>
|
23
|
-
type bigquery
|
23
|
+
@type bigquery
|
24
24
|
|
25
25
|
method insert # default
|
26
26
|
|
@@ -47,7 +47,7 @@ For high rate inserts over streaming inserts, you should specify flush intervals
|
|
47
47
|
|
48
48
|
```apache
|
49
49
|
<match dummy>
|
50
|
-
type bigquery
|
50
|
+
@type bigquery
|
51
51
|
|
52
52
|
method insert # default
|
53
53
|
|
@@ -106,6 +106,37 @@ Important options for high rate events are:
|
|
106
106
|
See [Quota policy](https://cloud.google.com/bigquery/streaming-data-into-bigquery#quota)
|
107
107
|
section in the Google BigQuery document.
|
108
108
|
|
109
|
+
### Load
|
110
|
+
```apache
|
111
|
+
<match bigquery>
|
112
|
+
@type bigquery
|
113
|
+
|
114
|
+
method load
|
115
|
+
buffer_type file
|
116
|
+
buffer_path bigquery.*.buffer
|
117
|
+
flush_interval 1800
|
118
|
+
flush_at_shutdown true
|
119
|
+
try_flush_interval 1
|
120
|
+
utc
|
121
|
+
|
122
|
+
auth_method json_key
|
123
|
+
json_key json_key_path.json
|
124
|
+
|
125
|
+
time_format %s
|
126
|
+
time_field time
|
127
|
+
|
128
|
+
project yourproject_id
|
129
|
+
dataset yourdataset_id
|
130
|
+
auto_create_table true
|
131
|
+
table yourtable%{time_slice}
|
132
|
+
schema_path bq_schema.json
|
133
|
+
</match>
|
134
|
+
```
|
135
|
+
|
136
|
+
I recommend to use file buffer and long flush interval.
|
137
|
+
|
138
|
+
__CAUTION: `flush_interval` default is still `0.25` even if `method` is `load` on current version.__
|
139
|
+
|
109
140
|
### Authentication
|
110
141
|
|
111
142
|
There are two methods supported to fetch access token for the service account.
|
@@ -127,7 +158,7 @@ download its JSON key and deploy the key with fluentd.
|
|
127
158
|
|
128
159
|
```apache
|
129
160
|
<match dummy>
|
130
|
-
type bigquery
|
161
|
+
@type bigquery
|
131
162
|
|
132
163
|
auth_method json_key
|
133
164
|
json_key /home/username/.keys/00000000000000000000000000000000-jsonkey.json
|
@@ -144,7 +175,7 @@ You need to only include `private_key` and `client_email` key from JSON key file
|
|
144
175
|
|
145
176
|
```apache
|
146
177
|
<match dummy>
|
147
|
-
type bigquery
|
178
|
+
@type bigquery
|
148
179
|
|
149
180
|
auth_method json_key
|
150
181
|
json_key {"private_key": "-----BEGIN PRIVATE KEY-----\n...", "client_email": "xxx@developer.gserviceaccount.com"}
|
@@ -165,7 +196,7 @@ Compute Engine instance, then you can configure fluentd like this.
|
|
165
196
|
|
166
197
|
```apache
|
167
198
|
<match dummy>
|
168
|
-
type bigquery
|
199
|
+
@type bigquery
|
169
200
|
|
170
201
|
auth_method compute_engine
|
171
202
|
|
@@ -198,6 +229,7 @@ In this authentication method, the credentials returned are determined by the en
|
|
198
229
|
|
199
230
|
### Table id formatting
|
200
231
|
|
232
|
+
#### strftime formatting
|
201
233
|
`table` and `tables` options accept [Time#strftime](http://ruby-doc.org/core-1.9.3/Time.html#method-i-strftime)
|
202
234
|
format to construct table ids.
|
203
235
|
Table ids are formatted at runtime
|
@@ -208,7 +240,7 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
|
|
208
240
|
|
209
241
|
```apache
|
210
242
|
<match dummy>
|
211
|
-
type bigquery
|
243
|
+
@type bigquery
|
212
244
|
|
213
245
|
...
|
214
246
|
|
@@ -220,8 +252,11 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
|
|
220
252
|
</match>
|
221
253
|
```
|
222
254
|
|
255
|
+
#### record attribute formatting
|
223
256
|
The format can be suffixed with attribute name.
|
224
257
|
|
258
|
+
__NOTE: This feature is available only if `method` is `insert`. Because it makes performance impact. Use `%{time_slice}` instead of it.__
|
259
|
+
|
225
260
|
```apache
|
226
261
|
<match dummy>
|
227
262
|
...
|
@@ -233,23 +268,39 @@ The format can be suffixed with attribute name.
|
|
233
268
|
If attribute name is given, the time to be used for formatting is value of each row.
|
234
269
|
The value for the time should be a UNIX time.
|
235
270
|
|
271
|
+
#### time_slice_key formatting
|
236
272
|
Or, the options can use `%{time_slice}` placeholder.
|
237
273
|
`%{time_slice}` is replaced by formatted time slice key at runtime.
|
238
274
|
|
239
275
|
```apache
|
240
276
|
<match dummy>
|
241
|
-
type bigquery
|
242
|
-
|
277
|
+
@type bigquery
|
278
|
+
|
243
279
|
...
|
244
|
-
|
245
|
-
project yourproject_id
|
246
|
-
dataset yourdataset_id
|
247
280
|
table accesslog%{time_slice}
|
248
|
-
|
249
281
|
...
|
250
282
|
</match>
|
251
283
|
```
|
252
284
|
|
285
|
+
#### record attribute value formatting
|
286
|
+
Or, `${attr_name}` placeholder is available to use value of attribute as part of table id.
|
287
|
+
`${attr_name}` is replaced by string value of the attribute specified by `attr_name`.
|
288
|
+
|
289
|
+
__NOTE: This feature is available only if `method` is `insert`.__
|
290
|
+
|
291
|
+
```apache
|
292
|
+
<match dummy>
|
293
|
+
...
|
294
|
+
table accesslog_%Y_%m_${subdomain}
|
295
|
+
...
|
296
|
+
</match>
|
297
|
+
```
|
298
|
+
|
299
|
+
For example value of `subdomain` attribute is `"bq.fluent"`, table id will be like "accesslog_2016_03_bqfluent".
|
300
|
+
|
301
|
+
- any type of attribute is allowed because stringified value will be used as replacement.
|
302
|
+
- acceptable characters are alphabets, digits and `_`. All other characters will be removed.
|
303
|
+
|
253
304
|
### Dynamic table creating
|
254
305
|
|
255
306
|
When `auto_create_table` is set to `true`, try to create the table using BigQuery API when insertion failed with code=404 "Not Found: Table ...".
|
@@ -259,7 +310,7 @@ NOTE: `auto_create_table` option cannot be used with `fetch_schema`. You should
|
|
259
310
|
|
260
311
|
```apache
|
261
312
|
<match dummy>
|
262
|
-
type bigquery
|
313
|
+
@type bigquery
|
263
314
|
|
264
315
|
...
|
265
316
|
|
@@ -283,7 +334,7 @@ you can also specify nested fields by prefixing their belonging record fields.
|
|
283
334
|
|
284
335
|
```apache
|
285
336
|
<match dummy>
|
286
|
-
type bigquery
|
337
|
+
@type bigquery
|
287
338
|
|
288
339
|
...
|
289
340
|
|
@@ -322,7 +373,7 @@ The second method is to specify a path to a BigQuery schema file instead of list
|
|
322
373
|
|
323
374
|
```apache
|
324
375
|
<match dummy>
|
325
|
-
type bigquery
|
376
|
+
@type bigquery
|
326
377
|
|
327
378
|
...
|
328
379
|
|
@@ -339,7 +390,7 @@ The third method is to set `fetch_schema` to `true` to enable fetch a schema usi
|
|
339
390
|
|
340
391
|
```apache
|
341
392
|
<match dummy>
|
342
|
-
type bigquery
|
393
|
+
@type bigquery
|
343
394
|
|
344
395
|
...
|
345
396
|
|
@@ -363,7 +414,7 @@ You can set `insert_id_field` option to specify the field to use as `insertId` p
|
|
363
414
|
|
364
415
|
```apache
|
365
416
|
<match dummy>
|
366
|
-
type bigquery
|
417
|
+
@type bigquery
|
367
418
|
|
368
419
|
...
|
369
420
|
|
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
|
|
11
11
|
spec.description = %q{Fluentd plugin to store data on Google BigQuery, by load, or by stream inserts}
|
12
12
|
spec.summary = %q{Fluentd plugin to store data on Google BigQuery}
|
13
13
|
spec.homepage = "https://github.com/kaizenplatform/fluent-plugin-bigquery"
|
14
|
-
spec.license = "
|
14
|
+
spec.license = "Apache-2.0"
|
15
15
|
|
16
16
|
spec.files = `git ls-files`.split($/)
|
17
17
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
@@ -23,9 +23,10 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_development_dependency "test-unit", "~> 3.0.2"
|
24
24
|
spec.add_development_dependency "test-unit-rr", "~> 1.0.3"
|
25
25
|
|
26
|
-
spec.add_runtime_dependency "google-api-client", "~> 0.9.
|
26
|
+
spec.add_runtime_dependency "google-api-client", "~> 0.9.3"
|
27
27
|
spec.add_runtime_dependency "googleauth", ">= 0.5.0"
|
28
28
|
spec.add_runtime_dependency "multi_json"
|
29
|
+
spec.add_runtime_dependency "activesupport", ">= 3.2"
|
29
30
|
spec.add_runtime_dependency "fluentd"
|
30
31
|
spec.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
|
31
32
|
spec.add_runtime_dependency "fluent-mixin-config-placeholders", ">= 0.3.0"
|
@@ -92,7 +92,7 @@ module Fluent
|
|
92
92
|
|
93
93
|
config_param :insert_id_field, :string, default: nil
|
94
94
|
|
95
|
-
config_param :method, :string, default: 'insert' # or 'load'
|
95
|
+
config_param :method, :string, default: 'insert' # or 'load'
|
96
96
|
|
97
97
|
config_param :load_size_limit, :integer, default: 1000**4 # < 1TB (1024^4) # TODO: not implemented now
|
98
98
|
### method: 'load'
|
@@ -150,6 +150,14 @@ module Fluent
|
|
150
150
|
def configure(conf)
|
151
151
|
super
|
152
152
|
|
153
|
+
if @method == "insert"
|
154
|
+
extend(InsertImplementation)
|
155
|
+
elsif @method == "load"
|
156
|
+
extend(LoadImplementation)
|
157
|
+
else
|
158
|
+
raise Fluend::ConfigError "'method' must be 'insert' or 'load'"
|
159
|
+
end
|
160
|
+
|
153
161
|
case @auth_method
|
154
162
|
when 'private_key'
|
155
163
|
unless @email && @private_key_path
|
@@ -286,6 +294,12 @@ module Fluent
|
|
286
294
|
else
|
287
295
|
current_time
|
288
296
|
end
|
297
|
+
if row && format =~ /\$\{/
|
298
|
+
json = row[:json]
|
299
|
+
format.gsub!(/\$\{\s*(\w+)\s*\}/) do |m|
|
300
|
+
row[:json][$1.to_sym].to_s.gsub(/[^\w]/, '')
|
301
|
+
end
|
302
|
+
end
|
289
303
|
table_id = time.strftime(format)
|
290
304
|
|
291
305
|
if chunk
|
@@ -321,29 +335,6 @@ module Fluent
|
|
321
335
|
raise "failed to create table in bigquery" # TODO: error class
|
322
336
|
end
|
323
337
|
|
324
|
-
def insert(table_id, rows)
|
325
|
-
client.insert_all_table_data(@project, @dataset, table_id, {
|
326
|
-
rows: rows
|
327
|
-
}, {})
|
328
|
-
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
329
|
-
# api_error? -> client cache clear
|
330
|
-
@cached_client = nil
|
331
|
-
|
332
|
-
message = e.message
|
333
|
-
if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ message.to_s
|
334
|
-
# Table Not Found: Auto Create Table
|
335
|
-
create_table(table_id)
|
336
|
-
raise "table created. send rows next time."
|
337
|
-
end
|
338
|
-
log.error "tabledata.insertAll API", project_id: @project, dataset: @dataset, table: table_id, code: e.status_code, message: message
|
339
|
-
raise "failed to insert into bigquery" # TODO: error class
|
340
|
-
end
|
341
|
-
|
342
|
-
def load
|
343
|
-
# https://developers.google.com/bigquery/loading-data-into-bigquery#loaddatapostrequest
|
344
|
-
raise NotImplementedError # TODO
|
345
|
-
end
|
346
|
-
|
347
338
|
def replace_record_key(record)
|
348
339
|
new_record = {}
|
349
340
|
record.each do |key, _|
|
@@ -366,44 +357,13 @@ module Fluent
|
|
366
357
|
record
|
367
358
|
end
|
368
359
|
|
369
|
-
def format(tag, time, record)
|
370
|
-
buf = ''
|
371
|
-
|
372
|
-
if @replace_record_key
|
373
|
-
record = replace_record_key(record)
|
374
|
-
end
|
375
|
-
|
376
|
-
if @convert_hash_to_json
|
377
|
-
record = convert_hash_to_json(record)
|
378
|
-
end
|
379
|
-
|
380
|
-
row = @fields.format(@add_time_field.call(record, time))
|
381
|
-
unless row.empty?
|
382
|
-
row = {"json" => row}
|
383
|
-
row['insert_id'] = @get_insert_id.call(record) if @get_insert_id
|
384
|
-
buf << row.to_msgpack
|
385
|
-
end
|
386
|
-
buf
|
387
|
-
end
|
388
|
-
|
389
360
|
def write(chunk)
|
390
|
-
|
391
|
-
chunk.msgpack_each do |row_object|
|
392
|
-
# TODO: row size limit
|
393
|
-
rows << row_object.deep_symbolize_keys
|
394
|
-
end
|
395
|
-
|
396
|
-
# TODO: method
|
397
|
-
|
398
|
-
insert_table_format = @tables_mutex.synchronize do
|
361
|
+
table_id_format = @tables_mutex.synchronize do
|
399
362
|
t = @tables_queue.shift
|
400
363
|
@tables_queue.push t
|
401
364
|
t
|
402
365
|
end
|
403
|
-
|
404
|
-
rows.group_by {|row| generate_table_id(insert_table_format, Time.at(Fluent::Engine.now), row, chunk) }.each do |table_id, rows|
|
405
|
-
insert(table_id, rows)
|
406
|
-
end
|
366
|
+
_write(chunk, table_id_format)
|
407
367
|
end
|
408
368
|
|
409
369
|
def fetch_schema
|
@@ -422,6 +382,137 @@ module Fluent
|
|
422
382
|
raise "failed to fetch schema from bigquery" # TODO: error class
|
423
383
|
end
|
424
384
|
|
385
|
+
module InsertImplementation
|
386
|
+
def format(tag, time, record)
|
387
|
+
buf = ''
|
388
|
+
|
389
|
+
if @replace_record_key
|
390
|
+
record = replace_record_key(record)
|
391
|
+
end
|
392
|
+
|
393
|
+
if @convert_hash_to_json
|
394
|
+
record = convert_hash_to_json(record)
|
395
|
+
end
|
396
|
+
|
397
|
+
row = @fields.format(@add_time_field.call(record, time))
|
398
|
+
unless row.empty?
|
399
|
+
row = {"json" => row}
|
400
|
+
row['insert_id'] = @get_insert_id.call(record) if @get_insert_id
|
401
|
+
buf << row.to_msgpack
|
402
|
+
end
|
403
|
+
buf
|
404
|
+
end
|
405
|
+
|
406
|
+
def _write(chunk, table_format)
|
407
|
+
rows = []
|
408
|
+
chunk.msgpack_each do |row_object|
|
409
|
+
# TODO: row size limit
|
410
|
+
rows << row_object.deep_symbolize_keys
|
411
|
+
end
|
412
|
+
|
413
|
+
rows.group_by {|row| generate_table_id(table_format, Time.at(Fluent::Engine.now), row, chunk) }.each do |table_id, group|
|
414
|
+
insert(table_id, group)
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
418
|
+
def insert(table_id, rows)
|
419
|
+
client.insert_all_table_data(@project, @dataset, table_id, {
|
420
|
+
rows: rows
|
421
|
+
}, {})
|
422
|
+
rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
|
423
|
+
# api_error? -> client cache clear
|
424
|
+
@cached_client = nil
|
425
|
+
|
426
|
+
message = e.message
|
427
|
+
if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ message.to_s
|
428
|
+
# Table Not Found: Auto Create Table
|
429
|
+
create_table(table_id)
|
430
|
+
raise "table created. send rows next time."
|
431
|
+
end
|
432
|
+
log.error "tabledata.insertAll API", project_id: @project, dataset: @dataset, table: table_id, code: e.status_code, message: message
|
433
|
+
raise "failed to insert into bigquery" # TODO: error class
|
434
|
+
end
|
435
|
+
end
|
436
|
+
|
437
|
+
module LoadImplementation
|
438
|
+
def format(tag, time, record)
|
439
|
+
buf = ''
|
440
|
+
|
441
|
+
if @replace_record_key
|
442
|
+
record = replace_record_key(record)
|
443
|
+
end
|
444
|
+
row = @fields.format(@add_time_field.call(record, time))
|
445
|
+
unless row.empty?
|
446
|
+
buf << MultiJson.dump(row) + "\n"
|
447
|
+
end
|
448
|
+
buf
|
449
|
+
end
|
450
|
+
|
451
|
+
def _write(chunk, table_id_format)
|
452
|
+
table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now), nil, chunk)
|
453
|
+
load(chunk, table_id)
|
454
|
+
end
|
455
|
+
|
456
|
+
def load(chunk, table_id)
|
457
|
+
res = nil
|
458
|
+
create_upload_source(chunk) do |upload_source|
|
459
|
+
res = client.insert_job(@project, {
|
460
|
+
configuration: {
|
461
|
+
load: {
|
462
|
+
destination_table: {
|
463
|
+
project_id: @project,
|
464
|
+
dataset_id: @dataset,
|
465
|
+
table_id: table_id,
|
466
|
+
},
|
467
|
+
schema: {
|
468
|
+
fields: @fields.to_a,
|
469
|
+
},
|
470
|
+
write_disposition: "WRITE_APPEND",
|
471
|
+
source_format: "NEWLINE_DELIMITED_JSON"
|
472
|
+
}
|
473
|
+
}
|
474
|
+
}, {upload_source: upload_source, content_type: "application/octet-stream"})
|
475
|
+
end
|
476
|
+
wait_load(res, table_id)
|
477
|
+
end
|
478
|
+
|
479
|
+
private
|
480
|
+
|
481
|
+
def wait_load(res, table_id)
|
482
|
+
wait_interval = 10
|
483
|
+
_response = res
|
484
|
+
until _response.status.state == "DONE"
|
485
|
+
log.debug "wait for load job finish", state: _response.status.state
|
486
|
+
sleep wait_interval
|
487
|
+
_response = client.get_job(@project, _response.job_reference.job_id)
|
488
|
+
end
|
489
|
+
|
490
|
+
if _response.status.error_result
|
491
|
+
log.error "job.insert API", project_id: @project, dataset: @dataset, table: table_id, message: _response.status.error_result.message
|
492
|
+
raise "failed to load into bigquery"
|
493
|
+
end
|
494
|
+
|
495
|
+
log.debug "finish load job", state: _response.status.state
|
496
|
+
end
|
497
|
+
|
498
|
+
def create_upload_source(chunk)
|
499
|
+
chunk_is_file = @buffer_type == 'file'
|
500
|
+
if chunk_is_file
|
501
|
+
File.open(chunk.path) do |file|
|
502
|
+
yield file
|
503
|
+
end
|
504
|
+
else
|
505
|
+
Tempfile.open("chunk-tmp") do |file|
|
506
|
+
file.binmode
|
507
|
+
chunk.write_to(file)
|
508
|
+
file.sync
|
509
|
+
file.rewind
|
510
|
+
yield file
|
511
|
+
end
|
512
|
+
end
|
513
|
+
end
|
514
|
+
end
|
515
|
+
|
425
516
|
class FieldSchema
|
426
517
|
def initialize(name, mode = :nullable)
|
427
518
|
unless [:nullable, :required, :repeated].include?(mode)
|
data/test/helper.rb
CHANGED
@@ -710,6 +710,35 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
710
710
|
assert_equal expected, MessagePack.unpack(buf)
|
711
711
|
end
|
712
712
|
|
713
|
+
def test_format_for_load
|
714
|
+
now = Time.now
|
715
|
+
input = [
|
716
|
+
now,
|
717
|
+
{
|
718
|
+
"uuid" => "9ABFF756-0267-4247-847F-0895B65F0938",
|
719
|
+
}
|
720
|
+
]
|
721
|
+
expected = MultiJson.dump({
|
722
|
+
"uuid" => "9ABFF756-0267-4247-847F-0895B65F0938",
|
723
|
+
}) + "\n"
|
724
|
+
|
725
|
+
driver = create_driver(<<-CONFIG)
|
726
|
+
method load
|
727
|
+
table foo
|
728
|
+
email foo@bar.example
|
729
|
+
private_key_path /path/to/key
|
730
|
+
project yourproject_id
|
731
|
+
dataset yourdataset_id
|
732
|
+
|
733
|
+
field_string uuid
|
734
|
+
CONFIG
|
735
|
+
driver.instance.start
|
736
|
+
buf = driver.instance.format_stream("my.tag", [input])
|
737
|
+
driver.instance.shutdown
|
738
|
+
|
739
|
+
assert_equal expected, buf
|
740
|
+
end
|
741
|
+
|
713
742
|
def test_empty_value_in_required
|
714
743
|
now = Time.now
|
715
744
|
input = [
|
@@ -857,6 +886,66 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
857
886
|
driver.instance.shutdown
|
858
887
|
end
|
859
888
|
|
889
|
+
def test_write_for_load
|
890
|
+
schema_path = File.join(File.dirname(__FILE__), "testdata", "sudo.schema")
|
891
|
+
entry = {a: "b"}, {b: "c"}
|
892
|
+
driver = create_driver(<<-CONFIG)
|
893
|
+
method load
|
894
|
+
table foo
|
895
|
+
email foo@bar.example
|
896
|
+
private_key_path /path/to/key
|
897
|
+
project yourproject_id
|
898
|
+
dataset yourdataset_id
|
899
|
+
|
900
|
+
time_format %s
|
901
|
+
time_field time
|
902
|
+
|
903
|
+
schema_path #{schema_path}
|
904
|
+
field_integer time
|
905
|
+
CONFIG
|
906
|
+
schema_fields = MultiJson.load(File.read(schema_path)).map(&:deep_symbolize_keys).tap do |h|
|
907
|
+
h[0][:type] = "INTEGER"
|
908
|
+
h[0][:mode] = "NULLABLE"
|
909
|
+
end
|
910
|
+
|
911
|
+
chunk = Fluent::MemoryBufferChunk.new("my.tag")
|
912
|
+
io = StringIO.new("hello")
|
913
|
+
mock(driver.instance).create_upload_source(chunk).yields(io)
|
914
|
+
mock_client(driver) do |expect|
|
915
|
+
expect.insert_job('yourproject_id', {
|
916
|
+
configuration: {
|
917
|
+
load: {
|
918
|
+
destination_table: {
|
919
|
+
project_id: 'yourproject_id',
|
920
|
+
dataset_id: 'yourdataset_id',
|
921
|
+
table_id: 'foo',
|
922
|
+
},
|
923
|
+
schema: {
|
924
|
+
fields: schema_fields,
|
925
|
+
},
|
926
|
+
write_disposition: "WRITE_APPEND",
|
927
|
+
source_format: "NEWLINE_DELIMITED_JSON"
|
928
|
+
}
|
929
|
+
}
|
930
|
+
}, {upload_source: io, content_type: "application/octet-stream"}) {
|
931
|
+
s = stub!
|
932
|
+
status_stub = stub!
|
933
|
+
s.status { status_stub }
|
934
|
+
status_stub.state { "DONE" }
|
935
|
+
status_stub.error_result { nil }
|
936
|
+
s
|
937
|
+
}
|
938
|
+
end
|
939
|
+
|
940
|
+
entry.each do |e|
|
941
|
+
chunk << MultiJson.dump(e) + "\n"
|
942
|
+
end
|
943
|
+
|
944
|
+
driver.instance.start
|
945
|
+
driver.instance.write(chunk)
|
946
|
+
driver.instance.shutdown
|
947
|
+
end
|
948
|
+
|
860
949
|
def test_write_with_row_based_table_id_formatting
|
861
950
|
entry = [
|
862
951
|
{json: {a: "b", created_at: Time.local(2014,8,20,9,0,0).to_i}},
|
@@ -935,6 +1024,26 @@ class BigQueryOutputTest < Test::Unit::TestCase
|
|
935
1024
|
assert_equal 'foo_20140811', table_id
|
936
1025
|
end
|
937
1026
|
|
1027
|
+
def test_generate_table_id_with_attribute_replacement
|
1028
|
+
driver = create_driver
|
1029
|
+
table_id_format = 'foo_%Y_%m_%d_${baz}'
|
1030
|
+
current_time = Time.now
|
1031
|
+
time = Time.local(2014, 8, 11, 21, 20, 56)
|
1032
|
+
[
|
1033
|
+
[ { baz: 1234 }, 'foo_2014_08_11_1234' ],
|
1034
|
+
[ { baz: 'piyo' }, 'foo_2014_08_11_piyo' ],
|
1035
|
+
[ { baz: true }, 'foo_2014_08_11_true' ],
|
1036
|
+
[ { baz: nil }, 'foo_2014_08_11_' ],
|
1037
|
+
[ { baz: '' }, 'foo_2014_08_11_' ],
|
1038
|
+
[ { baz: "_X-Y.Z !\n" }, 'foo_2014_08_11__XYZ' ],
|
1039
|
+
[ { baz: { xyz: 1 } }, 'foo_2014_08_11_xyz1' ],
|
1040
|
+
].each do |attrs, expected|
|
1041
|
+
row = { json: { created_at: Time.local(2014,8,10,21,20,57).to_i }.merge(attrs) }
|
1042
|
+
table_id = driver.instance.generate_table_id(table_id_format, time, row)
|
1043
|
+
assert_equal expected, table_id
|
1044
|
+
end
|
1045
|
+
end
|
1046
|
+
|
938
1047
|
def test_auto_create_table_by_bigquery_api
|
939
1048
|
now = Time.now
|
940
1049
|
message = {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naoya Ito
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-03-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -72,14 +72,14 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 0.9.
|
75
|
+
version: 0.9.3
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 0.9.
|
82
|
+
version: 0.9.3
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: googleauth
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: activesupport
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '3.2'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '3.2'
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: fluentd
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -193,7 +207,6 @@ files:
|
|
193
207
|
- README.md
|
194
208
|
- Rakefile
|
195
209
|
- fluent-plugin-bigquery.gemspec
|
196
|
-
- lib/fluent/plugin/bigquery/load_request_body_wrapper.rb
|
197
210
|
- lib/fluent/plugin/bigquery/version.rb
|
198
211
|
- lib/fluent/plugin/out_bigquery.rb
|
199
212
|
- test/helper.rb
|
@@ -201,10 +214,9 @@ files:
|
|
201
214
|
- test/plugin/testdata/apache.schema
|
202
215
|
- test/plugin/testdata/json_key.json
|
203
216
|
- test/plugin/testdata/sudo.schema
|
204
|
-
- test/test_load_request_body_wrapper.rb
|
205
217
|
homepage: https://github.com/kaizenplatform/fluent-plugin-bigquery
|
206
218
|
licenses:
|
207
|
-
-
|
219
|
+
- Apache-2.0
|
208
220
|
metadata: {}
|
209
221
|
post_install_message:
|
210
222
|
rdoc_options: []
|
@@ -232,4 +244,3 @@ test_files:
|
|
232
244
|
- test/plugin/testdata/apache.schema
|
233
245
|
- test/plugin/testdata/json_key.json
|
234
246
|
- test/plugin/testdata/sudo.schema
|
235
|
-
- test/test_load_request_body_wrapper.rb
|
@@ -1,173 +0,0 @@
|
|
1
|
-
module Fluent
|
2
|
-
module BigQueryPlugin
|
3
|
-
class LoadRequestBodyWrapper
|
4
|
-
# body can be a instance of IO (#rewind, #read, #to_str)
|
5
|
-
# http://rubydoc.info/github/google/google-api-ruby-client/Google/APIClient/Request#body-instance_method
|
6
|
-
|
7
|
-
# http://rubydoc.info/github/google/google-api-ruby-client/Google/APIClient#execute-instance_method
|
8
|
-
# (Google::APIClient::Method) api_method: The method object or the RPC name of the method being executed.
|
9
|
-
# (Hash, Array) parameters: The parameters to send to the method.
|
10
|
-
# (String) body: The body of the request.
|
11
|
-
# (Hash, Array) headers: The HTTP headers for the request.
|
12
|
-
# (Hash) options: A set of options for the request, of which:
|
13
|
-
# (#generate_authenticated_request) :authorization (default: true)
|
14
|
-
# - The authorization mechanism for the response. Used only if :authenticated is true.
|
15
|
-
# (TrueClass, FalseClass) :authenticated (default: true)
|
16
|
-
# - true if the request must be signed or somehow authenticated, false otherwise.
|
17
|
-
# (TrueClass, FalseClass) :gzip (default: true) - true if gzip enabled, false otherwise.
|
18
|
-
|
19
|
-
# https://developers.google.com/bigquery/loading-data-into-bigquery#loaddatapostrequest
|
20
|
-
|
21
|
-
JSON_PRETTY_DUMP = JSON::State.new(space: " ", indent:" ", object_nl:"\n", array_nl:"\n")
|
22
|
-
|
23
|
-
CONTENT_TYPE_FIRST = "Content-Type: application/json; charset=UTF-8\n\n"
|
24
|
-
CONTENT_TYPE_SECOND = "Content-Type: application/octet-stream\n\n"
|
25
|
-
|
26
|
-
MULTIPART_BOUNDARY = "--xxx\n"
|
27
|
-
MULTIPART_BOUNDARY_END = "--xxx--\n"
|
28
|
-
|
29
|
-
def initialize(project_id, dataset_id, table_id, field_defs, buffer)
|
30
|
-
@metadata = {
|
31
|
-
configuration: {
|
32
|
-
load: {
|
33
|
-
sourceFormat: "<required for JSON files>",
|
34
|
-
schema: {
|
35
|
-
fields: field_defs
|
36
|
-
},
|
37
|
-
destinationTable: {
|
38
|
-
projectId: project_id,
|
39
|
-
datasetId: dataset_id,
|
40
|
-
tableId: table_id
|
41
|
-
}
|
42
|
-
}
|
43
|
-
}
|
44
|
-
}
|
45
|
-
|
46
|
-
@non_buffer = MULTIPART_BOUNDARY + CONTENT_TYPE_FIRST + @metadata.to_json(JSON_PRETTY_DUMP) + "\n" +
|
47
|
-
MULTIPART_BOUNDARY + CONTENT_TYPE_SECOND
|
48
|
-
@non_buffer.force_encoding("ASCII-8BIT")
|
49
|
-
@non_buffer_bytesize = @non_buffer.bytesize
|
50
|
-
|
51
|
-
@buffer = buffer # read
|
52
|
-
@buffer_bytesize = @buffer.size # Fluentd Buffer Chunk #size -> bytesize
|
53
|
-
|
54
|
-
@footer = MULTIPART_BOUNDARY_END.force_encoding("ASCII-8BIT")
|
55
|
-
|
56
|
-
@contents_bytesize = @non_buffer_bytesize + @buffer_bytesize
|
57
|
-
@total_bytesize = @contents_bytesize + MULTIPART_BOUNDARY_END.bytesize
|
58
|
-
|
59
|
-
@whole_data = nil
|
60
|
-
|
61
|
-
@counter = 0
|
62
|
-
@eof = false
|
63
|
-
end
|
64
|
-
|
65
|
-
# sample_body = <<EOF
|
66
|
-
# --xxx
|
67
|
-
# Content-Type: application/json; charset=UTF-8
|
68
|
-
#
|
69
|
-
# {
|
70
|
-
# "configuration": {
|
71
|
-
# "load": {
|
72
|
-
# "sourceFormat": "<required for JSON files>",
|
73
|
-
# "schema": {
|
74
|
-
# "fields": [
|
75
|
-
# {"name":"f1", "type":"STRING"},
|
76
|
-
# {"name":"f2", "type":"INTEGER"}
|
77
|
-
# ]
|
78
|
-
# },
|
79
|
-
# "destinationTable": {
|
80
|
-
# "projectId": "projectId",
|
81
|
-
# "datasetId": "datasetId",
|
82
|
-
# "tableId": "tableId"
|
83
|
-
# }
|
84
|
-
# }
|
85
|
-
# }
|
86
|
-
# }
|
87
|
-
# --xxx
|
88
|
-
# Content-Type: application/octet-stream
|
89
|
-
#
|
90
|
-
# <your data>
|
91
|
-
# --xxx--
|
92
|
-
# EOF
|
93
|
-
def rewind
|
94
|
-
@counter = 0
|
95
|
-
@eof = false
|
96
|
-
end
|
97
|
-
|
98
|
-
def eof?
|
99
|
-
@eof
|
100
|
-
end
|
101
|
-
|
102
|
-
def to_str
|
103
|
-
rewind
|
104
|
-
self.read # all data
|
105
|
-
end
|
106
|
-
|
107
|
-
def read(length=nil, outbuf="")
|
108
|
-
raise ArgumentError, "negative read length" if length && length < 0
|
109
|
-
return (length.nil? || length == 0) ? "" : nil if @eof
|
110
|
-
return outbuf if length == 0
|
111
|
-
|
112
|
-
# read all data
|
113
|
-
if length.nil? || length >= @total_bytesize
|
114
|
-
@whole_data ||= @buffer.read.force_encoding("ASCII-8BIT")
|
115
|
-
|
116
|
-
if @counter.zero?
|
117
|
-
outbuf.replace(@non_buffer)
|
118
|
-
outbuf << @whole_data
|
119
|
-
outbuf << @footer
|
120
|
-
elsif @counter < @non_buffer_bytesize
|
121
|
-
outbuf.replace(@non_buffer[ @counter .. -1 ])
|
122
|
-
outbuf << @whole_data
|
123
|
-
outbuf << @footer
|
124
|
-
elsif @counter < @contents_bytesize
|
125
|
-
outbuf.replace(@whole_data[ (@counter - @non_buffer_bytesize) .. -1 ])
|
126
|
-
outbuf << @footer
|
127
|
-
else
|
128
|
-
outbuf.replace(@footer[ (@counter - @contents_bytesize) .. -1 ])
|
129
|
-
end
|
130
|
-
@counter = @total_bytesize
|
131
|
-
@eof = true
|
132
|
-
return outbuf
|
133
|
-
end
|
134
|
-
|
135
|
-
# In ruby script level (non-ext module), we cannot prevent to change outbuf length or object re-assignment
|
136
|
-
outbuf.replace("")
|
137
|
-
|
138
|
-
# return first part (metadata)
|
139
|
-
if @counter < @non_buffer_bytesize
|
140
|
-
non_buffer_part = @non_buffer[@counter, length]
|
141
|
-
if non_buffer_part
|
142
|
-
outbuf << non_buffer_part
|
143
|
-
length -= non_buffer_part.bytesize
|
144
|
-
@counter += non_buffer_part.bytesize
|
145
|
-
end
|
146
|
-
end
|
147
|
-
return outbuf if length < 1
|
148
|
-
|
149
|
-
# return second part (buffer content)
|
150
|
-
if @counter < @contents_bytesize
|
151
|
-
@whole_data ||= @buffer.read.force_encoding("ASCII-8BIT")
|
152
|
-
buffer_part = @whole_data[@counter - @non_buffer_bytesize, length]
|
153
|
-
if buffer_part
|
154
|
-
outbuf << buffer_part
|
155
|
-
length -= buffer_part.bytesize
|
156
|
-
@counter += buffer_part.bytesize
|
157
|
-
end
|
158
|
-
end
|
159
|
-
return outbuf if length < 1
|
160
|
-
|
161
|
-
# return footer
|
162
|
-
footer_part = @footer[@counter - @contents_bytesize, length]
|
163
|
-
if footer_part
|
164
|
-
outbuf << footer_part
|
165
|
-
@counter += footer_part.bytesize
|
166
|
-
@eof = true if @counter >= @total_bytesize
|
167
|
-
end
|
168
|
-
|
169
|
-
outbuf
|
170
|
-
end
|
171
|
-
end
|
172
|
-
end
|
173
|
-
end
|
@@ -1,190 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
require 'helper'
|
3
|
-
require 'json'
|
4
|
-
require 'tempfile'
|
5
|
-
|
6
|
-
class LoadRequestBodyWrapperTest < Test::Unit::TestCase
|
7
|
-
def content_alphabet(repeat)
|
8
|
-
(0...repeat).map{|i| "#{i}0123456789\n" }.join
|
9
|
-
end
|
10
|
-
|
11
|
-
def content_kana(repeat)
|
12
|
-
(0...repeat).map{|i| "#{i}あいうえおかきくけこ\n" }.join
|
13
|
-
end
|
14
|
-
|
15
|
-
def mem_chunk(repeat=10, kana=false)
|
16
|
-
content = kana ? content_kana(repeat) : content_alphabet(repeat)
|
17
|
-
Fluent::MemoryBufferChunk.new('bc_mem', content)
|
18
|
-
end
|
19
|
-
|
20
|
-
def file_chunk(repeat=10, kana=false)
|
21
|
-
content = kana ? content_kana(repeat) : content_alphabet(repeat)
|
22
|
-
tmpfile = Tempfile.new('fluent_bigquery_plugin_test')
|
23
|
-
buf = Fluent::FileBufferChunk.new('bc_mem', tmpfile.path, tmpfile.object_id)
|
24
|
-
buf << content
|
25
|
-
buf
|
26
|
-
end
|
27
|
-
|
28
|
-
def field_defs
|
29
|
-
[{"name" => "field1", "type" => "STRING"}, {"name" => "field2", "type" => "INTEGER"}]
|
30
|
-
end
|
31
|
-
|
32
|
-
def check_meta(blank, first, last)
|
33
|
-
assert_equal "", blank
|
34
|
-
|
35
|
-
header1, body1 = first.split("\n\n")
|
36
|
-
assert_equal "Content-Type: application/json; charset=UTF-8", header1
|
37
|
-
metadata = JSON.parse(body1)
|
38
|
-
assert_equal "<required for JSON files>", metadata["configuration"]["load"]["sourceFormat"]
|
39
|
-
assert_equal "field1", metadata["configuration"]["load"]["schema"]["fields"][0]["name"]
|
40
|
-
assert_equal "STRING", metadata["configuration"]["load"]["schema"]["fields"][0]["type"]
|
41
|
-
assert_equal "field2", metadata["configuration"]["load"]["schema"]["fields"][1]["name"]
|
42
|
-
assert_equal "INTEGER", metadata["configuration"]["load"]["schema"]["fields"][1]["type"]
|
43
|
-
assert_equal "pname1", metadata["configuration"]["load"]["destinationTable"]["projectId"]
|
44
|
-
assert_equal "dname1", metadata["configuration"]["load"]["destinationTable"]["datasetId"]
|
45
|
-
assert_equal "tname1", metadata["configuration"]["load"]["destinationTable"]["tableId"]
|
46
|
-
|
47
|
-
assert_equal "--\n", last
|
48
|
-
end
|
49
|
-
|
50
|
-
def check_ascii(data)
|
51
|
-
blank, first, second, last = data.split(/--xxx\n?/)
|
52
|
-
|
53
|
-
check_meta(blank, first, last)
|
54
|
-
|
55
|
-
header2, body2 = second.split("\n\n")
|
56
|
-
assert_equal "Content-Type: application/octet-stream", header2
|
57
|
-
i = 0
|
58
|
-
body2.each_line do |line|
|
59
|
-
assert_equal "#{i}0123456789\n", line
|
60
|
-
i += 1
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
def check_kana(data)
|
65
|
-
blank, first, second, last = data.split(/--xxx\n?/)
|
66
|
-
|
67
|
-
check_meta(blank, first, last)
|
68
|
-
|
69
|
-
header2, body2 = second.split("\n\n")
|
70
|
-
assert_equal "Content-Type: application/octet-stream", header2
|
71
|
-
i = 0
|
72
|
-
body2.each_line do |line|
|
73
|
-
assert_equal "#{i}あいうえおかきくけこ\n", line
|
74
|
-
i += 1
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def setup
|
79
|
-
@klass = Fluent::BigQueryPlugin::LoadRequestBodyWrapper
|
80
|
-
self
|
81
|
-
end
|
82
|
-
|
83
|
-
def test_memory_buf
|
84
|
-
d1 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), mem_chunk(10))
|
85
|
-
data1 = d1.read.force_encoding("UTF-8")
|
86
|
-
check_ascii(data1)
|
87
|
-
|
88
|
-
d2 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), mem_chunk(10))
|
89
|
-
data2 = ""
|
90
|
-
while !d2.eof? do
|
91
|
-
buf = " "
|
92
|
-
objid = buf.object_id
|
93
|
-
data2 << d2.read(20, buf)
|
94
|
-
assert_equal objid, buf.object_id
|
95
|
-
end
|
96
|
-
data2.force_encoding("UTF-8")
|
97
|
-
|
98
|
-
assert_equal data1.size, data2.size
|
99
|
-
end
|
100
|
-
|
101
|
-
def test_memory_buf2
|
102
|
-
d1 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), mem_chunk(100000))
|
103
|
-
data1 = d1.read.force_encoding("UTF-8")
|
104
|
-
check_ascii(data1)
|
105
|
-
|
106
|
-
d2 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), mem_chunk(100000))
|
107
|
-
data2 = ""
|
108
|
-
while !d2.eof? do
|
109
|
-
buf = " "
|
110
|
-
objid = buf.object_id
|
111
|
-
data2 << d2.read(2048, buf)
|
112
|
-
assert_equal objid, buf.object_id
|
113
|
-
end
|
114
|
-
data2.force_encoding("UTF-8")
|
115
|
-
|
116
|
-
assert_equal data1.size, data2.size
|
117
|
-
end
|
118
|
-
|
119
|
-
def test_memory_buf3 # kana
|
120
|
-
d1 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), mem_chunk(100000, true))
|
121
|
-
data1 = d1.read.force_encoding("UTF-8")
|
122
|
-
check_kana(data1)
|
123
|
-
|
124
|
-
d2 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), mem_chunk(100000, true))
|
125
|
-
data2 = ""
|
126
|
-
while !d2.eof? do
|
127
|
-
buf = " "
|
128
|
-
objid = buf.object_id
|
129
|
-
data2 << d2.read(2048, buf)
|
130
|
-
assert_equal objid, buf.object_id
|
131
|
-
end
|
132
|
-
data2.force_encoding("UTF-8")
|
133
|
-
|
134
|
-
assert_equal data1.size, data2.size
|
135
|
-
end
|
136
|
-
|
137
|
-
def test_file_buf
|
138
|
-
d1 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), file_chunk(10))
|
139
|
-
data1 = d1.read.force_encoding("UTF-8")
|
140
|
-
check_ascii(data1)
|
141
|
-
|
142
|
-
d2 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), file_chunk(10))
|
143
|
-
data2 = ""
|
144
|
-
while !d2.eof? do
|
145
|
-
buf = " "
|
146
|
-
objid = buf.object_id
|
147
|
-
data2 << d2.read(20, buf)
|
148
|
-
assert_equal objid, buf.object_id
|
149
|
-
end
|
150
|
-
data2.force_encoding("UTF-8")
|
151
|
-
|
152
|
-
assert_equal data1.size, data2.size
|
153
|
-
end
|
154
|
-
|
155
|
-
def test_file_buf2
|
156
|
-
d1 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), file_chunk(100000))
|
157
|
-
data1 = d1.read.force_encoding("UTF-8")
|
158
|
-
check_ascii(data1)
|
159
|
-
|
160
|
-
d2 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), file_chunk(100000))
|
161
|
-
data2 = ""
|
162
|
-
while !d2.eof? do
|
163
|
-
buf = " "
|
164
|
-
objid = buf.object_id
|
165
|
-
data2 << d2.read(20480, buf)
|
166
|
-
assert_equal objid, buf.object_id
|
167
|
-
end
|
168
|
-
data2.force_encoding("UTF-8")
|
169
|
-
|
170
|
-
assert_equal data1.size, data2.size
|
171
|
-
end
|
172
|
-
|
173
|
-
def test_file_buf3 # kana
|
174
|
-
d1 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), file_chunk(100000, true))
|
175
|
-
data1 = d1.read.force_encoding("UTF-8")
|
176
|
-
check_kana(data1)
|
177
|
-
|
178
|
-
d2 = @klass.new('pname1', 'dname1', 'tname1', field_defs(), file_chunk(100000, true))
|
179
|
-
data2 = ""
|
180
|
-
while !d2.eof? do
|
181
|
-
buf = " "
|
182
|
-
objid = buf.object_id
|
183
|
-
data2 << d2.read(20480, buf)
|
184
|
-
assert_equal objid, buf.object_id
|
185
|
-
end
|
186
|
-
data2.force_encoding("UTF-8")
|
187
|
-
|
188
|
-
assert_equal data1.size, data2.size
|
189
|
-
end
|
190
|
-
end
|