feedx 0.11.0 → 0.12.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.editorconfig +3 -0
  3. data/.github/workflows/test.yml +60 -0
  4. data/.gitignore +1 -0
  5. data/.rubocop.yml +15 -4
  6. data/Gemfile +0 -2
  7. data/Gemfile.lock +80 -50
  8. data/Makefile +6 -6
  9. data/README.md +1 -1
  10. data/compression.go +18 -0
  11. data/compression_test.go +14 -2
  12. data/consumer_test.go +2 -2
  13. data/ext/parquet/decoder.go +170 -0
  14. data/ext/parquet/decoder_test.go +88 -0
  15. data/ext/parquet/go.mod +10 -0
  16. data/ext/parquet/go.sum +152 -0
  17. data/ext/parquet/parquet.go +78 -0
  18. data/ext/parquet/parquet_test.go +28 -0
  19. data/ext/parquet/reader.go +89 -0
  20. data/ext/parquet/testdata/alltypes_plain.parquet +0 -0
  21. data/ext/parquet/types.go +51 -0
  22. data/feedx.gemspec +5 -6
  23. data/feedx_test.go +2 -2
  24. data/format.go +45 -15
  25. data/format_test.go +4 -2
  26. data/go.mod +10 -5
  27. data/go.sum +90 -25
  28. data/internal/testdata/testdata.pb.go +176 -77
  29. data/lib/feedx/cache/abstract.rb +2 -2
  30. data/lib/feedx/cache/memory.rb +1 -0
  31. data/lib/feedx/compression/abstract.rb +2 -2
  32. data/lib/feedx/compression/gzip.rb +2 -2
  33. data/lib/feedx/compression/none.rb +2 -2
  34. data/lib/feedx/consumer.rb +15 -9
  35. data/lib/feedx/format.rb +4 -1
  36. data/lib/feedx/producer.rb +27 -22
  37. data/lib/feedx/stream.rb +30 -13
  38. data/producer_test.go +2 -2
  39. data/reader_test.go +2 -2
  40. data/spec/feedx/cache/memory_spec.rb +2 -2
  41. data/spec/feedx/cache/value_spec.rb +1 -1
  42. data/spec/feedx/compression/gzip_spec.rb +1 -1
  43. data/spec/feedx/compression/none_spec.rb +1 -1
  44. data/spec/feedx/compression_spec.rb +2 -2
  45. data/spec/feedx/consumer_spec.rb +5 -4
  46. data/spec/feedx/format/abstract_spec.rb +2 -1
  47. data/spec/feedx/format/json_spec.rb +6 -6
  48. data/spec/feedx/format/parquet_spec.rb +1 -1
  49. data/spec/feedx/format/protobuf_spec.rb +1 -1
  50. data/spec/feedx/format_spec.rb +2 -2
  51. data/spec/feedx/producer_spec.rb +15 -8
  52. data/spec/feedx/stream_spec.rb +36 -18
  53. data/writer_test.go +2 -2
  54. metadata +24 -23
  55. data/.travis.yml +0 -24
@@ -4,7 +4,7 @@ RSpec.describe Feedx::Format::Protobuf do
4
4
  let(:wio) { StringIO.new }
5
5
  let(:rio) { StringIO.open(wio.string) }
6
6
 
7
- it 'should encode/decode' do
7
+ it 'encode/decodes' do
8
8
  subject.encoder wio do |enc|
9
9
  enc.encode(Feedx::TestCase::Model.new('X'))
10
10
  enc.encode(Feedx::TestCase::Model.new('Y'))
@@ -1,13 +1,13 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format do
4
- it 'should resolve' do
4
+ it 'resolves' do
5
5
  expect(described_class.resolve(:json)).to be_instance_of(described_class::JSON)
6
6
  expect(described_class.resolve(:pb)).to be_instance_of(described_class::Protobuf)
7
7
  expect { described_class.resolve(:txt) }.to raise_error(/invalid format txt/)
8
8
  end
9
9
 
10
- it 'should detect' do
10
+ it 'detects' do
11
11
  expect(described_class.detect('path/to/file.json')).to be_instance_of(described_class::JSON)
12
12
  expect(described_class.detect('path/to/file.jsonz')).to be_instance_of(described_class::JSON)
13
13
  expect(described_class.detect('path/to/file.json.gz')).to be_instance_of(described_class::JSON)
@@ -6,39 +6,40 @@ RSpec.describe Feedx::Producer do
6
6
  end
7
7
 
8
8
  let(:bucket) { BFS::Bucket::InMem.new }
9
+
9
10
  before { allow(BFS).to receive(:resolve).and_return(bucket) }
10
11
 
11
- it 'should reject invalid inputs' do
12
+ it 'rejects invalid inputs' do
12
13
  expect do
13
14
  described_class.perform 'mock:///dir/file.txt', enum: enumerable
14
15
  end.to raise_error(/unable to detect format/)
15
16
  end
16
17
 
17
- it 'should push compressed JSON' do
18
+ it 'pushes compressed JSON' do
18
19
  size = described_class.perform 'mock:///dir/file.jsonz', enum: enumerable
19
20
  expect(size).to be_within(20).of(166)
20
21
  expect(bucket.info('dir/file.jsonz').size).to eq(size)
21
22
  end
22
23
 
23
- it 'should push plain JSON' do
24
+ it 'pushes plain JSON' do
24
25
  size = described_class.perform 'mock:///dir/file.json', enum: enumerable
25
26
  expect(size).to eq(15900)
26
27
  expect(bucket.info('dir/file.json').size).to eq(size)
27
28
  end
28
29
 
29
- it 'should push compressed PB' do
30
+ it 'pushes compressed PB' do
30
31
  size = described_class.perform 'mock:///dir/file.pbz', enum: enumerable
31
32
  expect(size).to be_within(20).of(41)
32
33
  expect(bucket.info('dir/file.pbz').size).to eq(size)
33
34
  end
34
35
 
35
- it 'should push plain PB' do
36
+ it 'pushes plain PB' do
36
37
  size = described_class.perform 'mock:///dir/file.pb', enum: enumerable
37
38
  expect(size).to eq(1200)
38
39
  expect(bucket.info('dir/file.pb').size).to eq(size)
39
40
  end
40
41
 
41
- it 'should support factories' do
42
+ it 'supports factories' do
42
43
  size = described_class.perform('mock:///dir/file.json') do
43
44
  enumerable
44
45
  end
@@ -46,12 +47,12 @@ RSpec.describe Feedx::Producer do
46
47
  expect(bucket.info('dir/file.json').size).to eq(size)
47
48
  end
48
49
 
49
- it 'should support last-modified' do
50
+ it 'supports last-modified' do
50
51
  described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151515), enum: enumerable
51
52
  expect(bucket.info('dir/file.json').metadata).to eq('X-Feedx-Last-Modified' => '1515151515000')
52
53
  end
53
54
 
54
- it 'should perform conditionally' do
55
+ it 'performs conditionally' do
55
56
  size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151515), enum: enumerable
56
57
  expect(size).to eq(15900)
57
58
 
@@ -64,4 +65,10 @@ RSpec.describe Feedx::Producer do
64
65
  size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151516), enum: enumerable
65
66
  expect(size).to eq(15900)
66
67
  end
68
+
69
+ it 'accepts downstream options' do
70
+ expect do
71
+ described_class.perform 'mock:///dir/file.jsonz', enum: enumerable, x: 1, y: 'v', z: true
72
+ end.not_to raise_error
73
+ end
67
74
  end
@@ -1,19 +1,22 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Stream do
4
+ subject { described_class.new('mock:///dir/file.json') }
5
+
4
6
  let(:bucket) { BFS::Bucket::InMem.new }
7
+ let(:compressed) { described_class.new('mock:///dir/file.json.gz') }
8
+
5
9
  before { allow(BFS).to receive(:resolve).and_return(bucket) }
6
10
 
7
- subject { described_class.new('mock:///dir/file.json') }
8
- let(:compressed) { described_class.new('mock:///dir/file.json.gz') }
11
+ after { subject.close; compressed.close }
9
12
 
10
- it 'should reject invalid inputs' do
13
+ it 'rejects invalid inputs' do
11
14
  expect do
12
15
  described_class.new('mock:///dir/file.txt')
13
16
  end.to raise_error(/unable to detect format/)
14
17
  end
15
18
 
16
- it 'should accept custom formats' do
19
+ it 'accepts custom formats' do
17
20
  format = Class.new do
18
21
  def encoder(io, &block)
19
22
  Feedx::Format::JSON::Encoder.open(io, &block)
@@ -24,27 +27,30 @@ RSpec.describe Feedx::Stream do
24
27
  end
25
28
  end
26
29
 
27
- stream = described_class.new('mock:///dir/file.txt', format: format.new)
28
- stream.create {|s| s.encode Feedx::TestCase::Model.new('X') }
30
+ result = described_class.open('mock:///dir/file.txt', format: format.new) do |stream|
31
+ stream.create {|s| s.encode Feedx::TestCase::Model.new('X') }
32
+ 21
33
+ end
34
+ expect(result).to eq(21)
29
35
 
30
- expect(bucket.read('dir/file.txt')).to eq(
31
- %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n),
32
- )
36
+ expect(bucket.read('dir/file.txt')).to eq(<<~JSON)
37
+ {"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}
38
+ JSON
33
39
  end
34
40
 
35
- it 'should encode' do
41
+ it 'encodes' do
36
42
  subject.create do |s|
37
43
  s.encode(Feedx::TestCase::Model.new('X'))
38
44
  s.encode(Feedx::TestCase::Model.new('Y'))
39
45
  end
40
46
 
41
- expect(bucket.read('dir/file.json')).to eq(
42
- %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n) +
43
- %({"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}\n),
44
- )
47
+ expect(bucket.read('dir/file.json')).to eq(<<~JSON)
48
+ {"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}
49
+ {"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}
50
+ JSON
45
51
  end
46
52
 
47
- it 'should encode compressed' do
53
+ it 'encodes compressed' do
48
54
  compressed.create do |s|
49
55
  100.times do
50
56
  s.encode(Feedx::TestCase::Model.new('X'))
@@ -54,14 +60,26 @@ RSpec.describe Feedx::Stream do
54
60
  expect(bucket.info('dir/file.json.gz').size).to be_within(10).of(108)
55
61
  end
56
62
 
57
- it 'should encode with create options' do
63
+ it 'encodes with create options' do
58
64
  subject.create metadata: { 'x' => '5' } do |s|
59
65
  s.encode(Feedx::TestCase::Model.new('X'))
60
66
  end
61
67
  expect(bucket.info('dir/file.json').metadata).to eq('X' => '5')
62
68
  end
63
69
 
64
- it 'should decode' do
70
+ it 'aborts encode on errors (if compressed)' do
71
+ stop = RuntimeError.new('STOP')
72
+ expect do
73
+ compressed.create do |s|
74
+ s.encode(Feedx::TestCase::Model.new('X'))
75
+ raise stop
76
+ end
77
+ end.to raise_error(stop)
78
+
79
+ expect(bucket.ls('**').to_a).to be_empty
80
+ end
81
+
82
+ it 'decodes' do
65
83
  subject.create do |s|
66
84
  s.encode(Feedx::TestCase::Model.new('X'))
67
85
  s.encode(Feedx::TestCase::Model.new('Y'))
@@ -75,7 +93,7 @@ RSpec.describe Feedx::Stream do
75
93
  end
76
94
  end
77
95
 
78
- it 'should decode compressed' do
96
+ it 'decodes compressed' do
79
97
  compressed.create do |s|
80
98
  s.encode(Feedx::TestCase::Model.new('X'))
81
99
  s.encode(Feedx::TestCase::Model.new('Y'))
data/writer_test.go CHANGED
@@ -7,8 +7,8 @@ import (
7
7
 
8
8
  "github.com/bsm/bfs"
9
9
  "github.com/bsm/feedx"
10
- . "github.com/onsi/ginkgo"
11
- . "github.com/onsi/gomega"
10
+ . "github.com/bsm/ginkgo"
11
+ . "github.com/bsm/gomega"
12
12
  )
13
13
 
14
14
  var _ = Describe("Writer", func() {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.12.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Black Square Media Ltd
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-16 00:00:00.000000000 Z
11
+ date: 2021-05-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bfs
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.5.0
19
+ version: 0.8.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 0.5.0
26
+ version: 0.8.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -72,30 +72,22 @@ dependencies:
72
72
  requirements:
73
73
  - - ">="
74
74
  - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :development
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
75
+ version: '3.0'
76
+ - - "<"
81
77
  - !ruby/object:Gem::Version
82
- version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: rspec
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
78
+ version: '4.0'
90
79
  type: :development
91
80
  prerelease: false
92
81
  version_requirements: !ruby/object:Gem::Requirement
93
82
  requirements:
94
83
  - - ">="
95
84
  - !ruby/object:Gem::Version
96
- version: '0'
85
+ version: '3.0'
86
+ - - "<"
87
+ - !ruby/object:Gem::Version
88
+ version: '4.0'
97
89
  - !ruby/object:Gem::Dependency
98
- name: rubocop
90
+ name: rspec
99
91
  requirement: !ruby/object:Gem::Requirement
100
92
  requirements:
101
93
  - - ">="
@@ -109,7 +101,7 @@ dependencies:
109
101
  - !ruby/object:Gem::Version
110
102
  version: '0'
111
103
  - !ruby/object:Gem::Dependency
112
- name: rubocop-performance
104
+ name: rubocop-bsm
113
105
  requirement: !ruby/object:Gem::Requirement
114
106
  requirements:
115
107
  - - ">="
@@ -130,9 +122,9 @@ extensions: []
130
122
  extra_rdoc_files: []
131
123
  files:
132
124
  - ".editorconfig"
125
+ - ".github/workflows/test.yml"
133
126
  - ".gitignore"
134
127
  - ".rubocop.yml"
135
- - ".travis.yml"
136
128
  - Gemfile
137
129
  - Gemfile.lock
138
130
  - LICENSE
@@ -143,6 +135,15 @@ files:
143
135
  - compression_test.go
144
136
  - consumer.go
145
137
  - consumer_test.go
138
+ - ext/parquet/decoder.go
139
+ - ext/parquet/decoder_test.go
140
+ - ext/parquet/go.mod
141
+ - ext/parquet/go.sum
142
+ - ext/parquet/parquet.go
143
+ - ext/parquet/parquet_test.go
144
+ - ext/parquet/reader.go
145
+ - ext/parquet/testdata/alltypes_plain.parquet
146
+ - ext/parquet/types.go
146
147
  - feedx.gemspec
147
148
  - feedx.go
148
149
  - feedx_test.go
@@ -202,7 +203,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
202
203
  requirements:
203
204
  - - ">="
204
205
  - !ruby/object:Gem::Version
205
- version: '2.4'
206
+ version: '2.6'
206
207
  required_rubygems_version: !ruby/object:Gem::Requirement
207
208
  requirements:
208
209
  - - ">="
data/.travis.yml DELETED
@@ -1,24 +0,0 @@
1
- matrix:
2
- include:
3
- - language: ruby
4
- rvm:
5
- - 2.7
6
- before_install:
7
- - wget https://apache.bintray.com/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
8
- - sudo apt install -y ./apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
9
- - sudo apt update
10
- - sudo apt install -y libarrow-dev libarrow-glib-dev libarrow-dataset-dev libplasma-dev libplasma-glib-dev libgandiva-dev libgandiva-glib-dev libparquet-dev libparquet-glib-dev
11
- - language: ruby
12
- rvm:
13
- - 2.6
14
- before_install:
15
- - wget https://apache.bintray.com/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
16
- - sudo apt install -y ./apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
17
- - sudo apt update
18
- - sudo apt install -y libarrow-dev libarrow-glib-dev libarrow-dataset-dev libplasma-dev libplasma-glib-dev libgandiva-dev libgandiva-glib-dev libparquet-dev libparquet-glib-dev
19
- - language: go
20
- go:
21
- - 1.14.x
22
- - language: go
23
- go:
24
- - 1.13.x