feedx 0.10.2 → 0.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.editorconfig +3 -0
  3. data/.gitignore +1 -0
  4. data/.rubocop.yml +2 -0
  5. data/.travis.yml +12 -2
  6. data/Gemfile +0 -2
  7. data/Gemfile.lock +50 -30
  8. data/Makefile +10 -5
  9. data/compression.go +18 -0
  10. data/compression_test.go +12 -0
  11. data/consumer_test.go +5 -4
  12. data/ext/parquet/decoder.go +170 -0
  13. data/ext/parquet/decoder_test.go +88 -0
  14. data/ext/parquet/go.mod +12 -0
  15. data/ext/parquet/go.sum +134 -0
  16. data/ext/parquet/parquet.go +78 -0
  17. data/ext/parquet/parquet_test.go +28 -0
  18. data/ext/parquet/reader.go +89 -0
  19. data/ext/parquet/testdata/alltypes_plain.parquet +0 -0
  20. data/ext/parquet/types.go +51 -0
  21. data/feedx.gemspec +3 -2
  22. data/feedx_test.go +8 -24
  23. data/format.go +50 -20
  24. data/format_test.go +8 -6
  25. data/go.mod +9 -11
  26. data/go.sum +76 -28
  27. data/internal/testdata/testdata.pb.go +223 -0
  28. data/internal/testdata/testdata.proto +15 -0
  29. data/lib/feedx/cache/abstract.rb +2 -2
  30. data/lib/feedx/cache/memory.rb +1 -0
  31. data/lib/feedx/compression.rb +11 -4
  32. data/lib/feedx/compression/abstract.rb +2 -2
  33. data/lib/feedx/compression/gzip.rb +14 -16
  34. data/lib/feedx/compression/none.rb +4 -4
  35. data/lib/feedx/consumer.rb +15 -9
  36. data/lib/feedx/format.rb +18 -9
  37. data/lib/feedx/format/abstract.rb +42 -13
  38. data/lib/feedx/format/json.rb +12 -8
  39. data/lib/feedx/format/parquet.rb +102 -0
  40. data/lib/feedx/format/protobuf.rb +16 -8
  41. data/lib/feedx/producer.rb +27 -22
  42. data/lib/feedx/stream.rb +36 -23
  43. data/producer_test.go +1 -2
  44. data/reader_test.go +6 -6
  45. data/spec/feedx/compression/gzip_spec.rb +2 -2
  46. data/spec/feedx/compression/none_spec.rb +2 -2
  47. data/spec/feedx/compression_spec.rb +9 -9
  48. data/spec/feedx/consumer_spec.rb +1 -1
  49. data/spec/feedx/format/abstract_spec.rb +11 -8
  50. data/spec/feedx/format/json_spec.rb +17 -16
  51. data/spec/feedx/format/parquet_spec.rb +30 -0
  52. data/spec/feedx/format/protobuf_spec.rb +12 -11
  53. data/spec/feedx/format_spec.rb +8 -8
  54. data/spec/feedx/producer_spec.rb +6 -0
  55. data/spec/feedx/stream_spec.rb +43 -6
  56. data/spec/spec_helper.rb +17 -1
  57. metadata +33 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 73577178d9531fd5397bca23cb9e85cb0d6ab8ea9f97e32b0ce5b97dbebe43d4
4
- data.tar.gz: e1439be97329b54309831cd7c19c19982058e71879dbc612058f0bb5fce1a578
3
+ metadata.gz: c600f733aa3a0663939488c86cfd1d29abb4dde277ceb582a3e3d35c970830fd
4
+ data.tar.gz: 4ab6bbe4a53e31c4c2c408670f82a72fceb63128c0a22c7f57d0bf8c0eca95d2
5
5
  SHA512:
6
- metadata.gz: 673e28b0f7d0e01543796f7aa5ea869a80e119c2fbbcbb21eb9d6918ea4e051ecc20e6f1634e79fcc56acf1991ae68b26f7f69e1ec43c47a8b96d8a359d20834
7
- data.tar.gz: f2b9d69ef7dbc915c4a08c798e0da5f65a044907b27c3e742a2c4a7c1a1b5c514e648fc1924a335b99ca65d56454718b0a9041827306f6f19ce0a7c2d6507a97
6
+ metadata.gz: b3e1ef899a5dfafabad3eb45ebb56c672fc9348e52455e04c054967ea3e75b15e2cba3b74449868104204fa3bbf9d4bd4ff3e8c4b1e4db74cc8349982359e1f6
7
+ data.tar.gz: 3722d116710b5b16315af4fa7ac7e052f9c87efd1531e18d15000bc747119f7dececdd9bae02cac12b9dd363b00d033f710bf9e169986f3a036aab54194e64ef
@@ -7,3 +7,6 @@ end_of_line = lf
7
7
  charset = utf-8
8
8
  trim_trailing_whitespace = true
9
9
  insert_final_newline = true
10
+
11
+ [{*.go,Makefile}]
12
+ indent_style = tab
data/.gitignore CHANGED
@@ -1,3 +1,4 @@
1
1
  .rubocop-*
2
2
  pkg/
3
3
  *~
4
+ *.makefile
@@ -4,3 +4,5 @@ inherit_from:
4
4
 
5
5
  AllCops:
6
6
  TargetRubyVersion: "2.4"
7
+ Metrics/ParameterLists:
8
+ Max: 10
@@ -3,12 +3,22 @@ matrix:
3
3
  - language: ruby
4
4
  rvm:
5
5
  - 2.7
6
+ before_install:
7
+ - wget https://apache.bintray.com/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
8
+ - sudo apt install -y ./apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
9
+ - sudo apt update
10
+ - sudo apt install -y libarrow-dev libarrow-glib-dev libarrow-dataset-dev libplasma-dev libplasma-glib-dev libgandiva-dev libgandiva-glib-dev libparquet-dev libparquet-glib-dev
6
11
  - language: ruby
7
12
  rvm:
8
13
  - 2.6
14
+ before_install:
15
+ - wget https://apache.bintray.com/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
16
+ - sudo apt install -y ./apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
17
+ - sudo apt update
18
+ - sudo apt install -y libarrow-dev libarrow-glib-dev libarrow-dataset-dev libplasma-dev libplasma-glib-dev libgandiva-dev libgandiva-glib-dev libparquet-dev libparquet-glib-dev
9
19
  - language: go
10
20
  go:
11
- - 1.14.x
21
+ - 1.15.x
12
22
  - language: go
13
23
  go:
14
- - 1.13.x
24
+ - 1.14.x
data/Gemfile CHANGED
@@ -1,4 +1,2 @@
1
1
  source 'https://rubygems.org'
2
2
  gemspec
3
-
4
- gem 'google-protobuf', '>= 3.7.0-rc2'
@@ -1,49 +1,69 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- feedx (0.10.2)
5
- bfs (>= 0.5.0)
4
+ feedx (0.12.3)
5
+ bfs (>= 0.8.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
- ast (2.4.0)
11
- bfs (0.7.0)
12
- diff-lcs (1.3)
13
- google-protobuf (3.12.2)
14
- parallel (1.19.1)
15
- parser (2.7.1.3)
16
- ast (~> 2.4.0)
17
- pbio (0.2.1)
10
+ ast (2.4.1)
11
+ bfs (0.8.0)
12
+ diff-lcs (1.4.4)
13
+ extpp (0.0.9)
14
+ gio2 (3.4.3)
15
+ gobject-introspection (= 3.4.3)
16
+ glib2 (3.4.3)
17
+ native-package-installer (>= 1.0.3)
18
+ pkg-config (>= 1.3.5)
19
+ gobject-introspection (3.4.3)
20
+ glib2 (= 3.4.3)
21
+ google-protobuf (3.14.0)
22
+ native-package-installer (1.0.9)
23
+ parallel (1.20.1)
24
+ parser (2.7.2.0)
25
+ ast (~> 2.4.1)
26
+ pbio (0.2.2)
18
27
  google-protobuf
28
+ pkg-config (1.4.4)
19
29
  rainbow (3.0.0)
20
30
  rake (13.0.1)
31
+ red-arrow (2.0.0)
32
+ extpp (>= 0.0.7)
33
+ gio2 (>= 3.3.6)
34
+ native-package-installer
35
+ pkg-config
36
+ red-parquet (2.0.0)
37
+ red-arrow (= 2.0.0)
38
+ regexp_parser (2.0.0)
21
39
  rexml (3.2.4)
22
- rspec (3.9.0)
23
- rspec-core (~> 3.9.0)
24
- rspec-expectations (~> 3.9.0)
25
- rspec-mocks (~> 3.9.0)
26
- rspec-core (3.9.2)
27
- rspec-support (~> 3.9.3)
28
- rspec-expectations (3.9.2)
40
+ rspec (3.10.0)
41
+ rspec-core (~> 3.10.0)
42
+ rspec-expectations (~> 3.10.0)
43
+ rspec-mocks (~> 3.10.0)
44
+ rspec-core (3.10.0)
45
+ rspec-support (~> 3.10.0)
46
+ rspec-expectations (3.10.0)
29
47
  diff-lcs (>= 1.2.0, < 2.0)
30
- rspec-support (~> 3.9.0)
31
- rspec-mocks (3.9.1)
48
+ rspec-support (~> 3.10.0)
49
+ rspec-mocks (3.10.0)
32
50
  diff-lcs (>= 1.2.0, < 2.0)
33
- rspec-support (~> 3.9.0)
34
- rspec-support (3.9.3)
35
- rubocop (0.84.0)
51
+ rspec-support (~> 3.10.0)
52
+ rspec-support (3.10.0)
53
+ rubocop (1.4.2)
36
54
  parallel (~> 1.10)
37
- parser (>= 2.7.0.1)
55
+ parser (>= 2.7.1.5)
38
56
  rainbow (>= 2.2.2, < 4.0)
57
+ regexp_parser (>= 1.8)
39
58
  rexml
40
- rubocop-ast (>= 0.0.3)
59
+ rubocop-ast (>= 1.1.1)
41
60
  ruby-progressbar (~> 1.7)
42
61
  unicode-display_width (>= 1.4.0, < 2.0)
43
- rubocop-ast (0.0.3)
44
- parser (>= 2.7.0.1)
45
- rubocop-performance (1.6.0)
46
- rubocop (>= 0.71.0)
62
+ rubocop-ast (1.3.0)
63
+ parser (>= 2.7.1.5)
64
+ rubocop-performance (1.9.1)
65
+ rubocop (>= 0.90.0, < 2.0)
66
+ rubocop-ast (>= 0.4.0)
47
67
  ruby-progressbar (1.10.1)
48
68
  unicode-display_width (1.7.0)
49
69
 
@@ -53,12 +73,12 @@ PLATFORMS
53
73
  DEPENDENCIES
54
74
  bundler
55
75
  feedx!
56
- google-protobuf (>= 3.7.0.pre.rc2)
57
76
  pbio
58
77
  rake
78
+ red-parquet (>= 1.0.0)
59
79
  rspec
60
80
  rubocop
61
81
  rubocop-performance
62
82
 
63
83
  BUNDLED WITH
64
- 2.1.2
84
+ 2.1.4
data/Makefile CHANGED
@@ -1,7 +1,12 @@
1
- default: vet test
1
+ default: test
2
2
 
3
- test:
4
- go test ./...
3
+ .common.makefile:
4
+ curl -fsSL -o $@ https://gitlab.com/bsm/misc/raw/master/make/go/common.makefile
5
5
 
6
- vet:
7
- go vet ./...
6
+ include .common.makefile
7
+
8
+ proto: internal/testdata/testdata.pb.go
9
+
10
+ %.pb.go: %.proto
11
+ # may need to `go install google.golang.org/protobuf/cmd/protoc-gen-go`
12
+ protoc -I=. --go_out=paths=source_relative:. $<
@@ -1,6 +1,7 @@
1
1
  package feedx
2
2
 
3
3
  import (
4
+ "compress/flate"
4
5
  "compress/gzip"
5
6
  "io"
6
7
  "path"
@@ -20,6 +21,8 @@ func DetectCompression(name string) Compression {
20
21
  ext := path.Ext(path.Base(name))
21
22
  if ext != "" && ext[0] == '.' && ext[len(ext)-1] == 'z' {
22
23
  return GZipCompression
24
+ } else if ext == ".flate" {
25
+ return FlateCompression
23
26
  }
24
27
  }
25
28
  return NoCompression
@@ -61,3 +64,18 @@ func (gzipCompression) NewReader(r io.Reader) (io.ReadCloser, error) {
61
64
  func (gzipCompression) NewWriter(w io.Writer) (io.WriteCloser, error) {
62
65
  return gzip.NewWriter(w), nil
63
66
  }
67
+
68
+ // --------------------------------------------------------------------
69
+
70
+ // FlateCompression supports flate compression format.
71
+ var FlateCompression = flateCompression{}
72
+
73
+ type flateCompression struct{}
74
+
75
+ func (flateCompression) NewReader(r io.Reader) (io.ReadCloser, error) {
76
+ return flate.NewReader(r), nil
77
+ }
78
+
79
+ func (flateCompression) NewWriter(w io.Writer) (io.WriteCloser, error) {
80
+ return flate.NewWriter(w, flate.BestSpeed)
81
+ }
@@ -41,6 +41,9 @@ var _ = Describe("Compression", func() {
41
41
  Expect(feedx.DetectCompression("/path/to/file.pb.gz")).To(Equal(feedx.GZipCompression))
42
42
  Expect(feedx.DetectCompression("/path/to/file.pbz")).To(Equal(feedx.GZipCompression))
43
43
 
44
+ Expect(feedx.DetectCompression("/path/to/file.flate")).To(Equal(feedx.FlateCompression))
45
+ Expect(feedx.DetectCompression("/path/to/file.whatever.flate")).To(Equal(feedx.FlateCompression))
46
+
44
47
  Expect(feedx.DetectCompression("")).To(Equal(feedx.NoCompression))
45
48
  Expect(feedx.DetectCompression("/path/to/file")).To(Equal(feedx.NoCompression))
46
49
  Expect(feedx.DetectCompression("/path/to/file.txt")).To(Equal(feedx.NoCompression))
@@ -63,4 +66,13 @@ var _ = Describe("Compression", func() {
63
66
  runSharedTest(subject)
64
67
  })
65
68
  })
69
+
70
+ Describe("FlateCompression", func() {
71
+ var subject = feedx.FlateCompression
72
+ var _ feedx.Compression = subject
73
+
74
+ It("should write/read", func() {
75
+ runSharedTest(subject)
76
+ })
77
+ })
66
78
  })
@@ -7,6 +7,7 @@ import (
7
7
 
8
8
  "github.com/bsm/bfs"
9
9
  "github.com/bsm/feedx"
10
+ "github.com/bsm/feedx/internal/testdata"
10
11
  . "github.com/onsi/ginkgo"
11
12
  . "github.com/onsi/gomega"
12
13
  )
@@ -22,16 +23,16 @@ var _ = Describe("Consumer", func() {
22
23
 
23
24
  var err error
24
25
  subject, err = feedx.NewConsumerForRemote(ctx, obj, nil, func(r *feedx.Reader) (interface{}, error) {
25
- var msgs []MockMessage
26
+ var msgs []*testdata.MockMessage
26
27
  for {
27
- var msg MockMessage
28
+ var msg testdata.MockMessage
28
29
  if err := r.Decode(&msg); err == io.EOF {
29
30
  break
30
31
  }
31
32
  if err != nil {
32
33
  return nil, err
33
34
  }
34
- msgs = append(msgs, msg)
35
+ msgs = append(msgs, &msg)
35
36
  }
36
37
  return msgs, nil
37
38
  })
@@ -46,7 +47,7 @@ var _ = Describe("Consumer", func() {
46
47
  Expect(subject.LastSync()).To(BeTemporally("~", time.Now(), time.Second))
47
48
  Expect(subject.LastModified()).To(BeTemporally("~", time.Unix(1515151515, 0), time.Second))
48
49
  Expect(subject.NumRead()).To(Equal(2))
49
- Expect(subject.Data()).To(Equal([]MockMessage{fixture, fixture}))
50
+ Expect(subject.Data()).To(ConsistOf(seed(), seed()))
50
51
  Expect(subject.Close()).To(Succeed())
51
52
  })
52
53
  })
@@ -0,0 +1,170 @@
1
+ package parquet
2
+
3
+ import (
4
+ "encoding/binary"
5
+ "fmt"
6
+ "io"
7
+ "reflect"
8
+ "time"
9
+
10
+ kpq "github.com/bsm/parquet-go/parquet"
11
+ )
12
+
13
+ type decoder struct {
14
+ cols []*columnReader
15
+ closers []io.Closer
16
+ }
17
+
18
+ func newDecoder(rs io.ReadSeeker, names []string, batchSize int) (*decoder, error) {
19
+ file, err := kpq.FileFromReader(rs)
20
+ if err != nil {
21
+ return nil, err
22
+ }
23
+
24
+ // normalise column names
25
+ if len(names) == 0 {
26
+ for _, c := range file.Schema.Columns() {
27
+ names = append(names, c.String())
28
+ }
29
+ }
30
+
31
+ // normalise batch size
32
+ if batchSize < 1 {
33
+ batchSize = 1000
34
+ }
35
+
36
+ // initialise column buffers
37
+ cols := make([]*columnReader, 0, len(names))
38
+ for _, name := range names {
39
+ col, ok := file.Schema.ColumnByName(name)
40
+ if !ok {
41
+ _ = file.Close()
42
+ return nil, fmt.Errorf("column %q does not exist", name)
43
+ }
44
+ cols = append(cols, newColumnReader(file, col, batchSize))
45
+ }
46
+
47
+ return &decoder{cols: cols, closers: []io.Closer{file}}, nil
48
+ }
49
+
50
+ func (w *decoder) Decode(v interface{}) error {
51
+ rv := reflect.ValueOf(v)
52
+ rt := rv.Type()
53
+ if rt.Kind() != reflect.Ptr {
54
+ return fmt.Errorf("cannot decode non-pointer %s type", rt.String())
55
+ }
56
+
57
+ // field index by name
58
+ fidx := cachedTypeFields(rt.Elem())
59
+ elem := rv.Elem()
60
+
61
+ for _, r := range w.cols {
62
+ // next column value
63
+ val, err := r.Next()
64
+ if err != nil {
65
+ return err
66
+ }
67
+
68
+ // skip if value is NULL
69
+ if val == nil {
70
+ continue
71
+ }
72
+
73
+ // set field if exists
74
+ if fi, ok := fidx[r.Name()]; ok {
75
+ fv := elem.Field(fi)
76
+ if ok := setValue(fv, val); !ok {
77
+ return fmt.Errorf("cannot assign value of type %T to %s", val, fv.Type())
78
+ }
79
+ }
80
+ }
81
+
82
+ return nil
83
+ }
84
+
85
+ func (w *decoder) Close() (err error) {
86
+ for _, c := range w.closers {
87
+ if e := c.Close(); e != nil {
88
+ err = e
89
+ }
90
+ }
91
+ return
92
+ }
93
+
94
+ // --------------------------------------------------------------------
95
+
96
+ func setValue(rv reflect.Value, v interface{}) bool {
97
+ if rv.Kind() == reflect.Ptr {
98
+ if rv.IsNil() {
99
+ if ev := reflect.New(rv.Type().Elem()); setValue(ev, v) {
100
+ rv.Set(ev)
101
+ return true
102
+ }
103
+ return false
104
+ }
105
+ return setValue(rv.Elem(), v)
106
+ }
107
+
108
+ switch vv := v.(type) {
109
+ case bool:
110
+ switch rv.Kind() {
111
+ case reflect.Bool:
112
+ rv.SetBool(vv)
113
+ return true
114
+ }
115
+ case []byte:
116
+ switch rv.Kind() {
117
+ case reflect.String:
118
+ rv.SetString(string(vv))
119
+ return true
120
+ case reflect.Slice:
121
+ if rv.Type() == byteSliceType {
122
+ rv.SetBytes(vv)
123
+ return true
124
+ }
125
+ }
126
+ case int, int8, int16, int32, int64:
127
+ switch rv.Kind() {
128
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
129
+ rv.SetInt(reflect.ValueOf(v).Int())
130
+ return true
131
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
132
+ rv.SetUint(uint64(reflect.ValueOf(v).Int()))
133
+ return true
134
+ }
135
+ case uint, uint8, uint16, uint32, uint64:
136
+ switch rv.Kind() {
137
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
138
+ rv.SetInt(int64(reflect.ValueOf(v).Uint()))
139
+ return true
140
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
141
+ rv.SetUint(reflect.ValueOf(v).Uint())
142
+ return true
143
+ }
144
+ case float32, float64:
145
+ switch rv.Kind() {
146
+ case reflect.Float32, reflect.Float64:
147
+ rv.SetFloat(reflect.ValueOf(v).Float())
148
+ return true
149
+ }
150
+ case kpq.Int96:
151
+ if rt := rv.Type(); rt == timeType {
152
+ ns := int64(binary.LittleEndian.Uint64(vv[:8]))
153
+ jd := int64(binary.LittleEndian.Uint32(vv[8:]))
154
+ ts := time.Unix((jd-2440588)*86400, ns)
155
+ rv.Set(reflect.ValueOf(ts))
156
+ return true
157
+ } else if rt == int96Type {
158
+ rv.Set(reflect.ValueOf(v))
159
+ return true
160
+ }
161
+ }
162
+
163
+ return false
164
+ }
165
+
166
+ var (
167
+ byteSliceType = reflect.TypeOf(([]byte)(nil))
168
+ int96Type = reflect.TypeOf(kpq.Int96{})
169
+ timeType = reflect.TypeOf(time.Time{})
170
+ )