feedx 0.11.0 → 0.12.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.editorconfig +3 -0
  3. data/.github/workflows/test.yml +60 -0
  4. data/.gitignore +1 -0
  5. data/.rubocop.yml +15 -4
  6. data/Gemfile +0 -2
  7. data/Gemfile.lock +80 -50
  8. data/Makefile +6 -6
  9. data/README.md +1 -1
  10. data/compression.go +18 -0
  11. data/compression_test.go +14 -2
  12. data/consumer_test.go +2 -2
  13. data/ext/parquet/decoder.go +170 -0
  14. data/ext/parquet/decoder_test.go +88 -0
  15. data/ext/parquet/go.mod +10 -0
  16. data/ext/parquet/go.sum +152 -0
  17. data/ext/parquet/parquet.go +78 -0
  18. data/ext/parquet/parquet_test.go +28 -0
  19. data/ext/parquet/reader.go +89 -0
  20. data/ext/parquet/testdata/alltypes_plain.parquet +0 -0
  21. data/ext/parquet/types.go +51 -0
  22. data/feedx.gemspec +5 -6
  23. data/feedx_test.go +2 -2
  24. data/format.go +45 -15
  25. data/format_test.go +4 -2
  26. data/go.mod +10 -5
  27. data/go.sum +90 -25
  28. data/internal/testdata/testdata.pb.go +176 -77
  29. data/lib/feedx/cache/abstract.rb +2 -2
  30. data/lib/feedx/cache/memory.rb +1 -0
  31. data/lib/feedx/compression/abstract.rb +2 -2
  32. data/lib/feedx/compression/gzip.rb +2 -2
  33. data/lib/feedx/compression/none.rb +2 -2
  34. data/lib/feedx/consumer.rb +15 -9
  35. data/lib/feedx/format.rb +4 -1
  36. data/lib/feedx/producer.rb +27 -22
  37. data/lib/feedx/stream.rb +30 -13
  38. data/producer_test.go +2 -2
  39. data/reader_test.go +2 -2
  40. data/spec/feedx/cache/memory_spec.rb +2 -2
  41. data/spec/feedx/cache/value_spec.rb +1 -1
  42. data/spec/feedx/compression/gzip_spec.rb +1 -1
  43. data/spec/feedx/compression/none_spec.rb +1 -1
  44. data/spec/feedx/compression_spec.rb +2 -2
  45. data/spec/feedx/consumer_spec.rb +5 -4
  46. data/spec/feedx/format/abstract_spec.rb +2 -1
  47. data/spec/feedx/format/json_spec.rb +6 -6
  48. data/spec/feedx/format/parquet_spec.rb +1 -1
  49. data/spec/feedx/format/protobuf_spec.rb +1 -1
  50. data/spec/feedx/format_spec.rb +2 -2
  51. data/spec/feedx/producer_spec.rb +15 -8
  52. data/spec/feedx/stream_spec.rb +36 -18
  53. data/writer_test.go +2 -2
  54. metadata +24 -23
  55. data/.travis.yml +0 -24
@@ -4,7 +4,7 @@ RSpec.describe Feedx::Format::Protobuf do
4
4
  let(:wio) { StringIO.new }
5
5
  let(:rio) { StringIO.open(wio.string) }
6
6
 
7
- it 'should encode/decode' do
7
+ it 'encode/decodes' do
8
8
  subject.encoder wio do |enc|
9
9
  enc.encode(Feedx::TestCase::Model.new('X'))
10
10
  enc.encode(Feedx::TestCase::Model.new('Y'))
@@ -1,13 +1,13 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format do
4
- it 'should resolve' do
4
+ it 'resolves' do
5
5
  expect(described_class.resolve(:json)).to be_instance_of(described_class::JSON)
6
6
  expect(described_class.resolve(:pb)).to be_instance_of(described_class::Protobuf)
7
7
  expect { described_class.resolve(:txt) }.to raise_error(/invalid format txt/)
8
8
  end
9
9
 
10
- it 'should detect' do
10
+ it 'detects' do
11
11
  expect(described_class.detect('path/to/file.json')).to be_instance_of(described_class::JSON)
12
12
  expect(described_class.detect('path/to/file.jsonz')).to be_instance_of(described_class::JSON)
13
13
  expect(described_class.detect('path/to/file.json.gz')).to be_instance_of(described_class::JSON)
@@ -6,39 +6,40 @@ RSpec.describe Feedx::Producer do
6
6
  end
7
7
 
8
8
  let(:bucket) { BFS::Bucket::InMem.new }
9
+
9
10
  before { allow(BFS).to receive(:resolve).and_return(bucket) }
10
11
 
11
- it 'should reject invalid inputs' do
12
+ it 'rejects invalid inputs' do
12
13
  expect do
13
14
  described_class.perform 'mock:///dir/file.txt', enum: enumerable
14
15
  end.to raise_error(/unable to detect format/)
15
16
  end
16
17
 
17
- it 'should push compressed JSON' do
18
+ it 'pushes compressed JSON' do
18
19
  size = described_class.perform 'mock:///dir/file.jsonz', enum: enumerable
19
20
  expect(size).to be_within(20).of(166)
20
21
  expect(bucket.info('dir/file.jsonz').size).to eq(size)
21
22
  end
22
23
 
23
- it 'should push plain JSON' do
24
+ it 'pushes plain JSON' do
24
25
  size = described_class.perform 'mock:///dir/file.json', enum: enumerable
25
26
  expect(size).to eq(15900)
26
27
  expect(bucket.info('dir/file.json').size).to eq(size)
27
28
  end
28
29
 
29
- it 'should push compressed PB' do
30
+ it 'pushes compressed PB' do
30
31
  size = described_class.perform 'mock:///dir/file.pbz', enum: enumerable
31
32
  expect(size).to be_within(20).of(41)
32
33
  expect(bucket.info('dir/file.pbz').size).to eq(size)
33
34
  end
34
35
 
35
- it 'should push plain PB' do
36
+ it 'pushes plain PB' do
36
37
  size = described_class.perform 'mock:///dir/file.pb', enum: enumerable
37
38
  expect(size).to eq(1200)
38
39
  expect(bucket.info('dir/file.pb').size).to eq(size)
39
40
  end
40
41
 
41
- it 'should support factories' do
42
+ it 'supports factories' do
42
43
  size = described_class.perform('mock:///dir/file.json') do
43
44
  enumerable
44
45
  end
@@ -46,12 +47,12 @@ RSpec.describe Feedx::Producer do
46
47
  expect(bucket.info('dir/file.json').size).to eq(size)
47
48
  end
48
49
 
49
- it 'should support last-modified' do
50
+ it 'supports last-modified' do
50
51
  described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151515), enum: enumerable
51
52
  expect(bucket.info('dir/file.json').metadata).to eq('X-Feedx-Last-Modified' => '1515151515000')
52
53
  end
53
54
 
54
- it 'should perform conditionally' do
55
+ it 'performs conditionally' do
55
56
  size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151515), enum: enumerable
56
57
  expect(size).to eq(15900)
57
58
 
@@ -64,4 +65,10 @@ RSpec.describe Feedx::Producer do
64
65
  size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151516), enum: enumerable
65
66
  expect(size).to eq(15900)
66
67
  end
68
+
69
+ it 'accepts downstream options' do
70
+ expect do
71
+ described_class.perform 'mock:///dir/file.jsonz', enum: enumerable, x: 1, y: 'v', z: true
72
+ end.not_to raise_error
73
+ end
67
74
  end
@@ -1,19 +1,22 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Stream do
4
+ subject { described_class.new('mock:///dir/file.json') }
5
+
4
6
  let(:bucket) { BFS::Bucket::InMem.new }
7
+ let(:compressed) { described_class.new('mock:///dir/file.json.gz') }
8
+
5
9
  before { allow(BFS).to receive(:resolve).and_return(bucket) }
6
10
 
7
- subject { described_class.new('mock:///dir/file.json') }
8
- let(:compressed) { described_class.new('mock:///dir/file.json.gz') }
11
+ after { subject.close; compressed.close }
9
12
 
10
- it 'should reject invalid inputs' do
13
+ it 'rejects invalid inputs' do
11
14
  expect do
12
15
  described_class.new('mock:///dir/file.txt')
13
16
  end.to raise_error(/unable to detect format/)
14
17
  end
15
18
 
16
- it 'should accept custom formats' do
19
+ it 'accepts custom formats' do
17
20
  format = Class.new do
18
21
  def encoder(io, &block)
19
22
  Feedx::Format::JSON::Encoder.open(io, &block)
@@ -24,27 +27,30 @@ RSpec.describe Feedx::Stream do
24
27
  end
25
28
  end
26
29
 
27
- stream = described_class.new('mock:///dir/file.txt', format: format.new)
28
- stream.create {|s| s.encode Feedx::TestCase::Model.new('X') }
30
+ result = described_class.open('mock:///dir/file.txt', format: format.new) do |stream|
31
+ stream.create {|s| s.encode Feedx::TestCase::Model.new('X') }
32
+ 21
33
+ end
34
+ expect(result).to eq(21)
29
35
 
30
- expect(bucket.read('dir/file.txt')).to eq(
31
- %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n),
32
- )
36
+ expect(bucket.read('dir/file.txt')).to eq(<<~JSON)
37
+ {"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}
38
+ JSON
33
39
  end
34
40
 
35
- it 'should encode' do
41
+ it 'encodes' do
36
42
  subject.create do |s|
37
43
  s.encode(Feedx::TestCase::Model.new('X'))
38
44
  s.encode(Feedx::TestCase::Model.new('Y'))
39
45
  end
40
46
 
41
- expect(bucket.read('dir/file.json')).to eq(
42
- %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n) +
43
- %({"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}\n),
44
- )
47
+ expect(bucket.read('dir/file.json')).to eq(<<~JSON)
48
+ {"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}
49
+ {"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}
50
+ JSON
45
51
  end
46
52
 
47
- it 'should encode compressed' do
53
+ it 'encodes compressed' do
48
54
  compressed.create do |s|
49
55
  100.times do
50
56
  s.encode(Feedx::TestCase::Model.new('X'))
@@ -54,14 +60,26 @@ RSpec.describe Feedx::Stream do
54
60
  expect(bucket.info('dir/file.json.gz').size).to be_within(10).of(108)
55
61
  end
56
62
 
57
- it 'should encode with create options' do
63
+ it 'encodes with create options' do
58
64
  subject.create metadata: { 'x' => '5' } do |s|
59
65
  s.encode(Feedx::TestCase::Model.new('X'))
60
66
  end
61
67
  expect(bucket.info('dir/file.json').metadata).to eq('X' => '5')
62
68
  end
63
69
 
64
- it 'should decode' do
70
+ it 'aborts encode on errors (if compressed)' do
71
+ stop = RuntimeError.new('STOP')
72
+ expect do
73
+ compressed.create do |s|
74
+ s.encode(Feedx::TestCase::Model.new('X'))
75
+ raise stop
76
+ end
77
+ end.to raise_error(stop)
78
+
79
+ expect(bucket.ls('**').to_a).to be_empty
80
+ end
81
+
82
+ it 'decodes' do
65
83
  subject.create do |s|
66
84
  s.encode(Feedx::TestCase::Model.new('X'))
67
85
  s.encode(Feedx::TestCase::Model.new('Y'))
@@ -75,7 +93,7 @@ RSpec.describe Feedx::Stream do
75
93
  end
76
94
  end
77
95
 
78
- it 'should decode compressed' do
96
+ it 'decodes compressed' do
79
97
  compressed.create do |s|
80
98
  s.encode(Feedx::TestCase::Model.new('X'))
81
99
  s.encode(Feedx::TestCase::Model.new('Y'))
data/writer_test.go CHANGED
@@ -7,8 +7,8 @@ import (
7
7
 
8
8
  "github.com/bsm/bfs"
9
9
  "github.com/bsm/feedx"
10
- . "github.com/onsi/ginkgo"
11
- . "github.com/onsi/gomega"
10
+ . "github.com/bsm/ginkgo"
11
+ . "github.com/bsm/gomega"
12
12
  )
13
13
 
14
14
  var _ = Describe("Writer", func() {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.12.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Black Square Media Ltd
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-16 00:00:00.000000000 Z
11
+ date: 2021-05-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bfs
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.5.0
19
+ version: 0.8.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 0.5.0
26
+ version: 0.8.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -72,30 +72,22 @@ dependencies:
72
72
  requirements:
73
73
  - - ">="
74
74
  - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :development
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
75
+ version: '3.0'
76
+ - - "<"
81
77
  - !ruby/object:Gem::Version
82
- version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: rspec
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
78
+ version: '4.0'
90
79
  type: :development
91
80
  prerelease: false
92
81
  version_requirements: !ruby/object:Gem::Requirement
93
82
  requirements:
94
83
  - - ">="
95
84
  - !ruby/object:Gem::Version
96
- version: '0'
85
+ version: '3.0'
86
+ - - "<"
87
+ - !ruby/object:Gem::Version
88
+ version: '4.0'
97
89
  - !ruby/object:Gem::Dependency
98
- name: rubocop
90
+ name: rspec
99
91
  requirement: !ruby/object:Gem::Requirement
100
92
  requirements:
101
93
  - - ">="
@@ -109,7 +101,7 @@ dependencies:
109
101
  - !ruby/object:Gem::Version
110
102
  version: '0'
111
103
  - !ruby/object:Gem::Dependency
112
- name: rubocop-performance
104
+ name: rubocop-bsm
113
105
  requirement: !ruby/object:Gem::Requirement
114
106
  requirements:
115
107
  - - ">="
@@ -130,9 +122,9 @@ extensions: []
130
122
  extra_rdoc_files: []
131
123
  files:
132
124
  - ".editorconfig"
125
+ - ".github/workflows/test.yml"
133
126
  - ".gitignore"
134
127
  - ".rubocop.yml"
135
- - ".travis.yml"
136
128
  - Gemfile
137
129
  - Gemfile.lock
138
130
  - LICENSE
@@ -143,6 +135,15 @@ files:
143
135
  - compression_test.go
144
136
  - consumer.go
145
137
  - consumer_test.go
138
+ - ext/parquet/decoder.go
139
+ - ext/parquet/decoder_test.go
140
+ - ext/parquet/go.mod
141
+ - ext/parquet/go.sum
142
+ - ext/parquet/parquet.go
143
+ - ext/parquet/parquet_test.go
144
+ - ext/parquet/reader.go
145
+ - ext/parquet/testdata/alltypes_plain.parquet
146
+ - ext/parquet/types.go
146
147
  - feedx.gemspec
147
148
  - feedx.go
148
149
  - feedx_test.go
@@ -202,7 +203,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
202
203
  requirements:
203
204
  - - ">="
204
205
  - !ruby/object:Gem::Version
205
- version: '2.4'
206
+ version: '2.6'
206
207
  required_rubygems_version: !ruby/object:Gem::Requirement
207
208
  requirements:
208
209
  - - ">="
data/.travis.yml DELETED
@@ -1,24 +0,0 @@
1
- matrix:
2
- include:
3
- - language: ruby
4
- rvm:
5
- - 2.7
6
- before_install:
7
- - wget https://apache.bintray.com/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
8
- - sudo apt install -y ./apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
9
- - sudo apt update
10
- - sudo apt install -y libarrow-dev libarrow-glib-dev libarrow-dataset-dev libplasma-dev libplasma-glib-dev libgandiva-dev libgandiva-glib-dev libparquet-dev libparquet-glib-dev
11
- - language: ruby
12
- rvm:
13
- - 2.6
14
- before_install:
15
- - wget https://apache.bintray.com/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
16
- - sudo apt install -y ./apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
17
- - sudo apt update
18
- - sudo apt install -y libarrow-dev libarrow-glib-dev libarrow-dataset-dev libplasma-dev libplasma-glib-dev libgandiva-dev libgandiva-glib-dev libparquet-dev libparquet-glib-dev
19
- - language: go
20
- go:
21
- - 1.14.x
22
- - language: go
23
- go:
24
- - 1.13.x