feedx 0.10.2 → 0.12.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.editorconfig +3 -0
  3. data/.gitignore +1 -0
  4. data/.rubocop.yml +2 -0
  5. data/.travis.yml +12 -2
  6. data/Gemfile +0 -2
  7. data/Gemfile.lock +50 -30
  8. data/Makefile +10 -5
  9. data/compression.go +18 -0
  10. data/compression_test.go +12 -0
  11. data/consumer_test.go +5 -4
  12. data/ext/parquet/decoder.go +170 -0
  13. data/ext/parquet/decoder_test.go +88 -0
  14. data/ext/parquet/go.mod +12 -0
  15. data/ext/parquet/go.sum +134 -0
  16. data/ext/parquet/parquet.go +78 -0
  17. data/ext/parquet/parquet_test.go +28 -0
  18. data/ext/parquet/reader.go +89 -0
  19. data/ext/parquet/testdata/alltypes_plain.parquet +0 -0
  20. data/ext/parquet/types.go +51 -0
  21. data/feedx.gemspec +3 -2
  22. data/feedx_test.go +8 -24
  23. data/format.go +50 -20
  24. data/format_test.go +8 -6
  25. data/go.mod +9 -11
  26. data/go.sum +76 -28
  27. data/internal/testdata/testdata.pb.go +223 -0
  28. data/internal/testdata/testdata.proto +15 -0
  29. data/lib/feedx/cache/abstract.rb +2 -2
  30. data/lib/feedx/cache/memory.rb +1 -0
  31. data/lib/feedx/compression.rb +11 -4
  32. data/lib/feedx/compression/abstract.rb +2 -2
  33. data/lib/feedx/compression/gzip.rb +14 -16
  34. data/lib/feedx/compression/none.rb +4 -4
  35. data/lib/feedx/consumer.rb +15 -9
  36. data/lib/feedx/format.rb +18 -9
  37. data/lib/feedx/format/abstract.rb +42 -13
  38. data/lib/feedx/format/json.rb +12 -8
  39. data/lib/feedx/format/parquet.rb +102 -0
  40. data/lib/feedx/format/protobuf.rb +16 -8
  41. data/lib/feedx/producer.rb +27 -22
  42. data/lib/feedx/stream.rb +36 -23
  43. data/producer_test.go +1 -2
  44. data/reader_test.go +6 -6
  45. data/spec/feedx/compression/gzip_spec.rb +2 -2
  46. data/spec/feedx/compression/none_spec.rb +2 -2
  47. data/spec/feedx/compression_spec.rb +9 -9
  48. data/spec/feedx/consumer_spec.rb +1 -1
  49. data/spec/feedx/format/abstract_spec.rb +11 -8
  50. data/spec/feedx/format/json_spec.rb +17 -16
  51. data/spec/feedx/format/parquet_spec.rb +30 -0
  52. data/spec/feedx/format/protobuf_spec.rb +12 -11
  53. data/spec/feedx/format_spec.rb +8 -8
  54. data/spec/feedx/producer_spec.rb +6 -0
  55. data/spec/feedx/stream_spec.rb +43 -6
  56. data/spec/spec_helper.rb +17 -1
  57. metadata +33 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 73577178d9531fd5397bca23cb9e85cb0d6ab8ea9f97e32b0ce5b97dbebe43d4
4
- data.tar.gz: e1439be97329b54309831cd7c19c19982058e71879dbc612058f0bb5fce1a578
3
+ metadata.gz: c600f733aa3a0663939488c86cfd1d29abb4dde277ceb582a3e3d35c970830fd
4
+ data.tar.gz: 4ab6bbe4a53e31c4c2c408670f82a72fceb63128c0a22c7f57d0bf8c0eca95d2
5
5
  SHA512:
6
- metadata.gz: 673e28b0f7d0e01543796f7aa5ea869a80e119c2fbbcbb21eb9d6918ea4e051ecc20e6f1634e79fcc56acf1991ae68b26f7f69e1ec43c47a8b96d8a359d20834
7
- data.tar.gz: f2b9d69ef7dbc915c4a08c798e0da5f65a044907b27c3e742a2c4a7c1a1b5c514e648fc1924a335b99ca65d56454718b0a9041827306f6f19ce0a7c2d6507a97
6
+ metadata.gz: b3e1ef899a5dfafabad3eb45ebb56c672fc9348e52455e04c054967ea3e75b15e2cba3b74449868104204fa3bbf9d4bd4ff3e8c4b1e4db74cc8349982359e1f6
7
+ data.tar.gz: 3722d116710b5b16315af4fa7ac7e052f9c87efd1531e18d15000bc747119f7dececdd9bae02cac12b9dd363b00d033f710bf9e169986f3a036aab54194e64ef
@@ -7,3 +7,6 @@ end_of_line = lf
7
7
  charset = utf-8
8
8
  trim_trailing_whitespace = true
9
9
  insert_final_newline = true
10
+
11
+ [{*.go,Makefile}]
12
+ indent_style = tab
data/.gitignore CHANGED
@@ -1,3 +1,4 @@
1
1
  .rubocop-*
2
2
  pkg/
3
3
  *~
4
+ *.makefile
@@ -4,3 +4,5 @@ inherit_from:
4
4
 
5
5
  AllCops:
6
6
  TargetRubyVersion: "2.4"
7
+ Metrics/ParameterLists:
8
+ Max: 10
@@ -3,12 +3,22 @@ matrix:
3
3
  - language: ruby
4
4
  rvm:
5
5
  - 2.7
6
+ before_install:
7
+ - wget https://apache.bintray.com/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
8
+ - sudo apt install -y ./apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
9
+ - sudo apt update
10
+ - sudo apt install -y libarrow-dev libarrow-glib-dev libarrow-dataset-dev libplasma-dev libplasma-glib-dev libgandiva-dev libgandiva-glib-dev libparquet-dev libparquet-glib-dev
6
11
  - language: ruby
7
12
  rvm:
8
13
  - 2.6
14
+ before_install:
15
+ - wget https://apache.bintray.com/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
16
+ - sudo apt install -y ./apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
17
+ - sudo apt update
18
+ - sudo apt install -y libarrow-dev libarrow-glib-dev libarrow-dataset-dev libplasma-dev libplasma-glib-dev libgandiva-dev libgandiva-glib-dev libparquet-dev libparquet-glib-dev
9
19
  - language: go
10
20
  go:
11
- - 1.14.x
21
+ - 1.15.x
12
22
  - language: go
13
23
  go:
14
- - 1.13.x
24
+ - 1.14.x
data/Gemfile CHANGED
@@ -1,4 +1,2 @@
1
1
  source 'https://rubygems.org'
2
2
  gemspec
3
-
4
- gem 'google-protobuf', '>= 3.7.0-rc2'
@@ -1,49 +1,69 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- feedx (0.10.2)
5
- bfs (>= 0.5.0)
4
+ feedx (0.12.3)
5
+ bfs (>= 0.8.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
- ast (2.4.0)
11
- bfs (0.7.0)
12
- diff-lcs (1.3)
13
- google-protobuf (3.12.2)
14
- parallel (1.19.1)
15
- parser (2.7.1.3)
16
- ast (~> 2.4.0)
17
- pbio (0.2.1)
10
+ ast (2.4.1)
11
+ bfs (0.8.0)
12
+ diff-lcs (1.4.4)
13
+ extpp (0.0.9)
14
+ gio2 (3.4.3)
15
+ gobject-introspection (= 3.4.3)
16
+ glib2 (3.4.3)
17
+ native-package-installer (>= 1.0.3)
18
+ pkg-config (>= 1.3.5)
19
+ gobject-introspection (3.4.3)
20
+ glib2 (= 3.4.3)
21
+ google-protobuf (3.14.0)
22
+ native-package-installer (1.0.9)
23
+ parallel (1.20.1)
24
+ parser (2.7.2.0)
25
+ ast (~> 2.4.1)
26
+ pbio (0.2.2)
18
27
  google-protobuf
28
+ pkg-config (1.4.4)
19
29
  rainbow (3.0.0)
20
30
  rake (13.0.1)
31
+ red-arrow (2.0.0)
32
+ extpp (>= 0.0.7)
33
+ gio2 (>= 3.3.6)
34
+ native-package-installer
35
+ pkg-config
36
+ red-parquet (2.0.0)
37
+ red-arrow (= 2.0.0)
38
+ regexp_parser (2.0.0)
21
39
  rexml (3.2.4)
22
- rspec (3.9.0)
23
- rspec-core (~> 3.9.0)
24
- rspec-expectations (~> 3.9.0)
25
- rspec-mocks (~> 3.9.0)
26
- rspec-core (3.9.2)
27
- rspec-support (~> 3.9.3)
28
- rspec-expectations (3.9.2)
40
+ rspec (3.10.0)
41
+ rspec-core (~> 3.10.0)
42
+ rspec-expectations (~> 3.10.0)
43
+ rspec-mocks (~> 3.10.0)
44
+ rspec-core (3.10.0)
45
+ rspec-support (~> 3.10.0)
46
+ rspec-expectations (3.10.0)
29
47
  diff-lcs (>= 1.2.0, < 2.0)
30
- rspec-support (~> 3.9.0)
31
- rspec-mocks (3.9.1)
48
+ rspec-support (~> 3.10.0)
49
+ rspec-mocks (3.10.0)
32
50
  diff-lcs (>= 1.2.0, < 2.0)
33
- rspec-support (~> 3.9.0)
34
- rspec-support (3.9.3)
35
- rubocop (0.84.0)
51
+ rspec-support (~> 3.10.0)
52
+ rspec-support (3.10.0)
53
+ rubocop (1.4.2)
36
54
  parallel (~> 1.10)
37
- parser (>= 2.7.0.1)
55
+ parser (>= 2.7.1.5)
38
56
  rainbow (>= 2.2.2, < 4.0)
57
+ regexp_parser (>= 1.8)
39
58
  rexml
40
- rubocop-ast (>= 0.0.3)
59
+ rubocop-ast (>= 1.1.1)
41
60
  ruby-progressbar (~> 1.7)
42
61
  unicode-display_width (>= 1.4.0, < 2.0)
43
- rubocop-ast (0.0.3)
44
- parser (>= 2.7.0.1)
45
- rubocop-performance (1.6.0)
46
- rubocop (>= 0.71.0)
62
+ rubocop-ast (1.3.0)
63
+ parser (>= 2.7.1.5)
64
+ rubocop-performance (1.9.1)
65
+ rubocop (>= 0.90.0, < 2.0)
66
+ rubocop-ast (>= 0.4.0)
47
67
  ruby-progressbar (1.10.1)
48
68
  unicode-display_width (1.7.0)
49
69
 
@@ -53,12 +73,12 @@ PLATFORMS
53
73
  DEPENDENCIES
54
74
  bundler
55
75
  feedx!
56
- google-protobuf (>= 3.7.0.pre.rc2)
57
76
  pbio
58
77
  rake
78
+ red-parquet (>= 1.0.0)
59
79
  rspec
60
80
  rubocop
61
81
  rubocop-performance
62
82
 
63
83
  BUNDLED WITH
64
- 2.1.2
84
+ 2.1.4
data/Makefile CHANGED
@@ -1,7 +1,12 @@
1
- default: vet test
1
+ default: test
2
2
 
3
- test:
4
- go test ./...
3
+ .common.makefile:
4
+ curl -fsSL -o $@ https://gitlab.com/bsm/misc/raw/master/make/go/common.makefile
5
5
 
6
- vet:
7
- go vet ./...
6
+ include .common.makefile
7
+
8
+ proto: internal/testdata/testdata.pb.go
9
+
10
+ %.pb.go: %.proto
11
+ # may need to `go install google.golang.org/protobuf/cmd/protoc-gen-go`
12
+ protoc -I=. --go_out=paths=source_relative:. $<
@@ -1,6 +1,7 @@
1
1
  package feedx
2
2
 
3
3
  import (
4
+ "compress/flate"
4
5
  "compress/gzip"
5
6
  "io"
6
7
  "path"
@@ -20,6 +21,8 @@ func DetectCompression(name string) Compression {
20
21
  ext := path.Ext(path.Base(name))
21
22
  if ext != "" && ext[0] == '.' && ext[len(ext)-1] == 'z' {
22
23
  return GZipCompression
24
+ } else if ext == ".flate" {
25
+ return FlateCompression
23
26
  }
24
27
  }
25
28
  return NoCompression
@@ -61,3 +64,18 @@ func (gzipCompression) NewReader(r io.Reader) (io.ReadCloser, error) {
61
64
  func (gzipCompression) NewWriter(w io.Writer) (io.WriteCloser, error) {
62
65
  return gzip.NewWriter(w), nil
63
66
  }
67
+
68
+ // --------------------------------------------------------------------
69
+
70
+ // FlateCompression supports flate compression format.
71
+ var FlateCompression = flateCompression{}
72
+
73
+ type flateCompression struct{}
74
+
75
+ func (flateCompression) NewReader(r io.Reader) (io.ReadCloser, error) {
76
+ return flate.NewReader(r), nil
77
+ }
78
+
79
+ func (flateCompression) NewWriter(w io.Writer) (io.WriteCloser, error) {
80
+ return flate.NewWriter(w, flate.BestSpeed)
81
+ }
@@ -41,6 +41,9 @@ var _ = Describe("Compression", func() {
41
41
  Expect(feedx.DetectCompression("/path/to/file.pb.gz")).To(Equal(feedx.GZipCompression))
42
42
  Expect(feedx.DetectCompression("/path/to/file.pbz")).To(Equal(feedx.GZipCompression))
43
43
 
44
+ Expect(feedx.DetectCompression("/path/to/file.flate")).To(Equal(feedx.FlateCompression))
45
+ Expect(feedx.DetectCompression("/path/to/file.whatever.flate")).To(Equal(feedx.FlateCompression))
46
+
44
47
  Expect(feedx.DetectCompression("")).To(Equal(feedx.NoCompression))
45
48
  Expect(feedx.DetectCompression("/path/to/file")).To(Equal(feedx.NoCompression))
46
49
  Expect(feedx.DetectCompression("/path/to/file.txt")).To(Equal(feedx.NoCompression))
@@ -63,4 +66,13 @@ var _ = Describe("Compression", func() {
63
66
  runSharedTest(subject)
64
67
  })
65
68
  })
69
+
70
+ Describe("FlateCompression", func() {
71
+ var subject = feedx.FlateCompression
72
+ var _ feedx.Compression = subject
73
+
74
+ It("should write/read", func() {
75
+ runSharedTest(subject)
76
+ })
77
+ })
66
78
  })
@@ -7,6 +7,7 @@ import (
7
7
 
8
8
  "github.com/bsm/bfs"
9
9
  "github.com/bsm/feedx"
10
+ "github.com/bsm/feedx/internal/testdata"
10
11
  . "github.com/onsi/ginkgo"
11
12
  . "github.com/onsi/gomega"
12
13
  )
@@ -22,16 +23,16 @@ var _ = Describe("Consumer", func() {
22
23
 
23
24
  var err error
24
25
  subject, err = feedx.NewConsumerForRemote(ctx, obj, nil, func(r *feedx.Reader) (interface{}, error) {
25
- var msgs []MockMessage
26
+ var msgs []*testdata.MockMessage
26
27
  for {
27
- var msg MockMessage
28
+ var msg testdata.MockMessage
28
29
  if err := r.Decode(&msg); err == io.EOF {
29
30
  break
30
31
  }
31
32
  if err != nil {
32
33
  return nil, err
33
34
  }
34
- msgs = append(msgs, msg)
35
+ msgs = append(msgs, &msg)
35
36
  }
36
37
  return msgs, nil
37
38
  })
@@ -46,7 +47,7 @@ var _ = Describe("Consumer", func() {
46
47
  Expect(subject.LastSync()).To(BeTemporally("~", time.Now(), time.Second))
47
48
  Expect(subject.LastModified()).To(BeTemporally("~", time.Unix(1515151515, 0), time.Second))
48
49
  Expect(subject.NumRead()).To(Equal(2))
49
- Expect(subject.Data()).To(Equal([]MockMessage{fixture, fixture}))
50
+ Expect(subject.Data()).To(ConsistOf(seed(), seed()))
50
51
  Expect(subject.Close()).To(Succeed())
51
52
  })
52
53
  })
@@ -0,0 +1,170 @@
1
+ package parquet
2
+
3
+ import (
4
+ "encoding/binary"
5
+ "fmt"
6
+ "io"
7
+ "reflect"
8
+ "time"
9
+
10
+ kpq "github.com/bsm/parquet-go/parquet"
11
+ )
12
+
13
+ type decoder struct {
14
+ cols []*columnReader
15
+ closers []io.Closer
16
+ }
17
+
18
+ func newDecoder(rs io.ReadSeeker, names []string, batchSize int) (*decoder, error) {
19
+ file, err := kpq.FileFromReader(rs)
20
+ if err != nil {
21
+ return nil, err
22
+ }
23
+
24
+ // normalise column names
25
+ if len(names) == 0 {
26
+ for _, c := range file.Schema.Columns() {
27
+ names = append(names, c.String())
28
+ }
29
+ }
30
+
31
+ // normalise batch size
32
+ if batchSize < 1 {
33
+ batchSize = 1000
34
+ }
35
+
36
+ // initialise column buffers
37
+ cols := make([]*columnReader, 0, len(names))
38
+ for _, name := range names {
39
+ col, ok := file.Schema.ColumnByName(name)
40
+ if !ok {
41
+ _ = file.Close()
42
+ return nil, fmt.Errorf("column %q does not exist", name)
43
+ }
44
+ cols = append(cols, newColumnReader(file, col, batchSize))
45
+ }
46
+
47
+ return &decoder{cols: cols, closers: []io.Closer{file}}, nil
48
+ }
49
+
50
+ func (w *decoder) Decode(v interface{}) error {
51
+ rv := reflect.ValueOf(v)
52
+ rt := rv.Type()
53
+ if rt.Kind() != reflect.Ptr {
54
+ return fmt.Errorf("cannot decode non-pointer %s type", rt.String())
55
+ }
56
+
57
+ // field index by name
58
+ fidx := cachedTypeFields(rt.Elem())
59
+ elem := rv.Elem()
60
+
61
+ for _, r := range w.cols {
62
+ // next column value
63
+ val, err := r.Next()
64
+ if err != nil {
65
+ return err
66
+ }
67
+
68
+ // skip if value is NULL
69
+ if val == nil {
70
+ continue
71
+ }
72
+
73
+ // set field if exists
74
+ if fi, ok := fidx[r.Name()]; ok {
75
+ fv := elem.Field(fi)
76
+ if ok := setValue(fv, val); !ok {
77
+ return fmt.Errorf("cannot assign value of type %T to %s", val, fv.Type())
78
+ }
79
+ }
80
+ }
81
+
82
+ return nil
83
+ }
84
+
85
+ func (w *decoder) Close() (err error) {
86
+ for _, c := range w.closers {
87
+ if e := c.Close(); e != nil {
88
+ err = e
89
+ }
90
+ }
91
+ return
92
+ }
93
+
94
+ // --------------------------------------------------------------------
95
+
96
+ func setValue(rv reflect.Value, v interface{}) bool {
97
+ if rv.Kind() == reflect.Ptr {
98
+ if rv.IsNil() {
99
+ if ev := reflect.New(rv.Type().Elem()); setValue(ev, v) {
100
+ rv.Set(ev)
101
+ return true
102
+ }
103
+ return false
104
+ }
105
+ return setValue(rv.Elem(), v)
106
+ }
107
+
108
+ switch vv := v.(type) {
109
+ case bool:
110
+ switch rv.Kind() {
111
+ case reflect.Bool:
112
+ rv.SetBool(vv)
113
+ return true
114
+ }
115
+ case []byte:
116
+ switch rv.Kind() {
117
+ case reflect.String:
118
+ rv.SetString(string(vv))
119
+ return true
120
+ case reflect.Slice:
121
+ if rv.Type() == byteSliceType {
122
+ rv.SetBytes(vv)
123
+ return true
124
+ }
125
+ }
126
+ case int, int8, int16, int32, int64:
127
+ switch rv.Kind() {
128
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
129
+ rv.SetInt(reflect.ValueOf(v).Int())
130
+ return true
131
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
132
+ rv.SetUint(uint64(reflect.ValueOf(v).Int()))
133
+ return true
134
+ }
135
+ case uint, uint8, uint16, uint32, uint64:
136
+ switch rv.Kind() {
137
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
138
+ rv.SetInt(int64(reflect.ValueOf(v).Uint()))
139
+ return true
140
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
141
+ rv.SetUint(reflect.ValueOf(v).Uint())
142
+ return true
143
+ }
144
+ case float32, float64:
145
+ switch rv.Kind() {
146
+ case reflect.Float32, reflect.Float64:
147
+ rv.SetFloat(reflect.ValueOf(v).Float())
148
+ return true
149
+ }
150
+ case kpq.Int96:
151
+ if rt := rv.Type(); rt == timeType {
152
+ ns := int64(binary.LittleEndian.Uint64(vv[:8]))
153
+ jd := int64(binary.LittleEndian.Uint32(vv[8:]))
154
+ ts := time.Unix((jd-2440588)*86400, ns)
155
+ rv.Set(reflect.ValueOf(ts))
156
+ return true
157
+ } else if rt == int96Type {
158
+ rv.Set(reflect.ValueOf(v))
159
+ return true
160
+ }
161
+ }
162
+
163
+ return false
164
+ }
165
+
166
+ var (
167
+ byteSliceType = reflect.TypeOf(([]byte)(nil))
168
+ int96Type = reflect.TypeOf(kpq.Int96{})
169
+ timeType = reflect.TypeOf(time.Time{})
170
+ )