feedx 0.12.2 → 0.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 898f80c359182952ab192f95fb05dc1e6ae806f0d661d71244813b779595d0f8
4
- data.tar.gz: 43778c6084131e8ffa9ba59b7836fe198f7595e5dc07af1c4e11a9b03ac49419
3
+ metadata.gz: c600f733aa3a0663939488c86cfd1d29abb4dde277ceb582a3e3d35c970830fd
4
+ data.tar.gz: 4ab6bbe4a53e31c4c2c408670f82a72fceb63128c0a22c7f57d0bf8c0eca95d2
5
5
  SHA512:
6
- metadata.gz: fa8f2e4e26c140bb4c2732655ac73c39ef20055f215ac39c8c62bd77e7cf27d7d8140287ed6c90a4c4da6bf461d491123f76784ce5d770189a45da7fea897965
7
- data.tar.gz: a35d5296c5aec452b450e95985535c98b6b99f1a872624b3ad3974cbb0f56b0e6d0ae8395edd849c97242ae0a77ee77537404cb3dce22882a3a310a6e0f04a04
6
+ metadata.gz: b3e1ef899a5dfafabad3eb45ebb56c672fc9348e52455e04c054967ea3e75b15e2cba3b74449868104204fa3bbf9d4bd4ff3e8c4b1e4db74cc8349982359e1f6
7
+ data.tar.gz: 3722d116710b5b16315af4fa7ac7e052f9c87efd1531e18d15000bc747119f7dececdd9bae02cac12b9dd363b00d033f710bf9e169986f3a036aab54194e64ef
@@ -7,3 +7,6 @@ end_of_line = lf
7
7
  charset = utf-8
8
8
  trim_trailing_whitespace = true
9
9
  insert_final_newline = true
10
+
11
+ [{*.go,Makefile}]
12
+ indent_style = tab
data/.gitignore CHANGED
@@ -1,3 +1,4 @@
1
1
  .rubocop-*
2
2
  pkg/
3
3
  *~
4
+ *.makefile
@@ -18,7 +18,7 @@ matrix:
18
18
  - sudo apt install -y libarrow-dev libarrow-glib-dev libarrow-dataset-dev libplasma-dev libplasma-glib-dev libgandiva-dev libgandiva-glib-dev libparquet-dev libparquet-glib-dev
19
19
  - language: go
20
20
  go:
21
- - 1.14.x
21
+ - 1.15.x
22
22
  - language: go
23
23
  go:
24
- - 1.13.x
24
+ - 1.14.x
data/Gemfile CHANGED
@@ -1,4 +1,2 @@
1
1
  source 'https://rubygems.org'
2
2
  gemspec
3
-
4
- gem 'google-protobuf', '>= 3.7.0-rc2'
@@ -1,16 +1,16 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- feedx (0.12.2)
5
- bfs (>= 0.5.0)
4
+ feedx (0.12.3)
5
+ bfs (>= 0.8.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
10
  ast (2.4.1)
11
- bfs (0.7.2)
11
+ bfs (0.8.0)
12
12
  diff-lcs (1.4.4)
13
- extpp (0.0.8)
13
+ extpp (0.0.9)
14
14
  gio2 (3.4.3)
15
15
  gobject-introspection (= 3.4.3)
16
16
  glib2 (3.4.3)
@@ -18,51 +18,52 @@ GEM
18
18
  pkg-config (>= 1.3.5)
19
19
  gobject-introspection (3.4.3)
20
20
  glib2 (= 3.4.3)
21
- google-protobuf (3.12.2)
21
+ google-protobuf (3.14.0)
22
22
  native-package-installer (1.0.9)
23
- parallel (1.19.2)
24
- parser (2.7.1.4)
23
+ parallel (1.20.1)
24
+ parser (2.7.2.0)
25
25
  ast (~> 2.4.1)
26
- pbio (0.2.1)
26
+ pbio (0.2.2)
27
27
  google-protobuf
28
- pkg-config (1.4.1)
28
+ pkg-config (1.4.4)
29
29
  rainbow (3.0.0)
30
30
  rake (13.0.1)
31
- red-arrow (0.17.1)
31
+ red-arrow (2.0.0)
32
32
  extpp (>= 0.0.7)
33
33
  gio2 (>= 3.3.6)
34
34
  native-package-installer
35
35
  pkg-config
36
- red-parquet (0.17.1)
37
- red-arrow (= 0.17.1)
38
- regexp_parser (1.7.1)
36
+ red-parquet (2.0.0)
37
+ red-arrow (= 2.0.0)
38
+ regexp_parser (2.0.0)
39
39
  rexml (3.2.4)
40
- rspec (3.9.0)
41
- rspec-core (~> 3.9.0)
42
- rspec-expectations (~> 3.9.0)
43
- rspec-mocks (~> 3.9.0)
44
- rspec-core (3.9.2)
45
- rspec-support (~> 3.9.3)
46
- rspec-expectations (3.9.2)
40
+ rspec (3.10.0)
41
+ rspec-core (~> 3.10.0)
42
+ rspec-expectations (~> 3.10.0)
43
+ rspec-mocks (~> 3.10.0)
44
+ rspec-core (3.10.0)
45
+ rspec-support (~> 3.10.0)
46
+ rspec-expectations (3.10.0)
47
47
  diff-lcs (>= 1.2.0, < 2.0)
48
- rspec-support (~> 3.9.0)
49
- rspec-mocks (3.9.1)
48
+ rspec-support (~> 3.10.0)
49
+ rspec-mocks (3.10.0)
50
50
  diff-lcs (>= 1.2.0, < 2.0)
51
- rspec-support (~> 3.9.0)
52
- rspec-support (3.9.3)
53
- rubocop (0.86.0)
51
+ rspec-support (~> 3.10.0)
52
+ rspec-support (3.10.0)
53
+ rubocop (1.4.2)
54
54
  parallel (~> 1.10)
55
- parser (>= 2.7.0.1)
55
+ parser (>= 2.7.1.5)
56
56
  rainbow (>= 2.2.2, < 4.0)
57
- regexp_parser (>= 1.7)
57
+ regexp_parser (>= 1.8)
58
58
  rexml
59
- rubocop-ast (>= 0.0.3, < 1.0)
59
+ rubocop-ast (>= 1.1.1)
60
60
  ruby-progressbar (~> 1.7)
61
61
  unicode-display_width (>= 1.4.0, < 2.0)
62
- rubocop-ast (0.1.0)
63
- parser (>= 2.7.0.1)
64
- rubocop-performance (1.6.1)
65
- rubocop (>= 0.71.0)
62
+ rubocop-ast (1.3.0)
63
+ parser (>= 2.7.1.5)
64
+ rubocop-performance (1.9.1)
65
+ rubocop (>= 0.90.0, < 2.0)
66
+ rubocop-ast (>= 0.4.0)
66
67
  ruby-progressbar (1.10.1)
67
68
  unicode-display_width (1.7.0)
68
69
 
@@ -72,10 +73,9 @@ PLATFORMS
72
73
  DEPENDENCIES
73
74
  bundler
74
75
  feedx!
75
- google-protobuf (>= 3.7.0.pre.rc2)
76
76
  pbio
77
77
  rake
78
- red-parquet
78
+ red-parquet (>= 1.0.0)
79
79
  rspec
80
80
  rubocop
81
81
  rubocop-performance
data/Makefile CHANGED
@@ -1,12 +1,12 @@
1
- default: vet test
1
+ default: test
2
2
 
3
- test:
4
- go test ./...
3
+ .common.makefile:
4
+ curl -fsSL -o $@ https://gitlab.com/bsm/misc/raw/master/make/go/common.makefile
5
5
 
6
- vet:
7
- go vet ./...
6
+ include .common.makefile
8
7
 
9
8
  proto: internal/testdata/testdata.pb.go
10
9
 
11
10
  %.pb.go: %.proto
12
- protoc -I=. --gogo_out=paths=source_relative:. $<
11
+ # may need to `go install google.golang.org/protobuf/cmd/protoc-gen-go`
12
+ protoc -I=. --go_out=paths=source_relative:. $<
@@ -1,6 +1,7 @@
1
1
  package feedx
2
2
 
3
3
  import (
4
+ "compress/flate"
4
5
  "compress/gzip"
5
6
  "io"
6
7
  "path"
@@ -20,6 +21,8 @@ func DetectCompression(name string) Compression {
20
21
  ext := path.Ext(path.Base(name))
21
22
  if ext != "" && ext[0] == '.' && ext[len(ext)-1] == 'z' {
22
23
  return GZipCompression
24
+ } else if ext == ".flate" {
25
+ return FlateCompression
23
26
  }
24
27
  }
25
28
  return NoCompression
@@ -61,3 +64,18 @@ func (gzipCompression) NewReader(r io.Reader) (io.ReadCloser, error) {
61
64
  func (gzipCompression) NewWriter(w io.Writer) (io.WriteCloser, error) {
62
65
  return gzip.NewWriter(w), nil
63
66
  }
67
+
68
+ // --------------------------------------------------------------------
69
+
70
+ // FlateCompression supports flate compression format.
71
+ var FlateCompression = flateCompression{}
72
+
73
+ type flateCompression struct{}
74
+
75
+ func (flateCompression) NewReader(r io.Reader) (io.ReadCloser, error) {
76
+ return flate.NewReader(r), nil
77
+ }
78
+
79
+ func (flateCompression) NewWriter(w io.Writer) (io.WriteCloser, error) {
80
+ return flate.NewWriter(w, flate.BestSpeed)
81
+ }
@@ -41,6 +41,9 @@ var _ = Describe("Compression", func() {
41
41
  Expect(feedx.DetectCompression("/path/to/file.pb.gz")).To(Equal(feedx.GZipCompression))
42
42
  Expect(feedx.DetectCompression("/path/to/file.pbz")).To(Equal(feedx.GZipCompression))
43
43
 
44
+ Expect(feedx.DetectCompression("/path/to/file.flate")).To(Equal(feedx.FlateCompression))
45
+ Expect(feedx.DetectCompression("/path/to/file.whatever.flate")).To(Equal(feedx.FlateCompression))
46
+
44
47
  Expect(feedx.DetectCompression("")).To(Equal(feedx.NoCompression))
45
48
  Expect(feedx.DetectCompression("/path/to/file")).To(Equal(feedx.NoCompression))
46
49
  Expect(feedx.DetectCompression("/path/to/file.txt")).To(Equal(feedx.NoCompression))
@@ -63,4 +66,13 @@ var _ = Describe("Compression", func() {
63
66
  runSharedTest(subject)
64
67
  })
65
68
  })
69
+
70
+ Describe("FlateCompression", func() {
71
+ var subject = feedx.FlateCompression
72
+ var _ feedx.Compression = subject
73
+
74
+ It("should write/read", func() {
75
+ runSharedTest(subject)
76
+ })
77
+ })
66
78
  })
@@ -0,0 +1,170 @@
1
+ package parquet
2
+
3
+ import (
4
+ "encoding/binary"
5
+ "fmt"
6
+ "io"
7
+ "reflect"
8
+ "time"
9
+
10
+ kpq "github.com/bsm/parquet-go/parquet"
11
+ )
12
+
13
+ type decoder struct {
14
+ cols []*columnReader
15
+ closers []io.Closer
16
+ }
17
+
18
+ func newDecoder(rs io.ReadSeeker, names []string, batchSize int) (*decoder, error) {
19
+ file, err := kpq.FileFromReader(rs)
20
+ if err != nil {
21
+ return nil, err
22
+ }
23
+
24
+ // normalise column names
25
+ if len(names) == 0 {
26
+ for _, c := range file.Schema.Columns() {
27
+ names = append(names, c.String())
28
+ }
29
+ }
30
+
31
+ // normalise batch size
32
+ if batchSize < 1 {
33
+ batchSize = 1000
34
+ }
35
+
36
+ // initialise column buffers
37
+ cols := make([]*columnReader, 0, len(names))
38
+ for _, name := range names {
39
+ col, ok := file.Schema.ColumnByName(name)
40
+ if !ok {
41
+ _ = file.Close()
42
+ return nil, fmt.Errorf("column %q does not exist", name)
43
+ }
44
+ cols = append(cols, newColumnReader(file, col, batchSize))
45
+ }
46
+
47
+ return &decoder{cols: cols, closers: []io.Closer{file}}, nil
48
+ }
49
+
50
+ func (w *decoder) Decode(v interface{}) error {
51
+ rv := reflect.ValueOf(v)
52
+ rt := rv.Type()
53
+ if rt.Kind() != reflect.Ptr {
54
+ return fmt.Errorf("cannot decode non-pointer %s type", rt.String())
55
+ }
56
+
57
+ // field index by name
58
+ fidx := cachedTypeFields(rt.Elem())
59
+ elem := rv.Elem()
60
+
61
+ for _, r := range w.cols {
62
+ // next column value
63
+ val, err := r.Next()
64
+ if err != nil {
65
+ return err
66
+ }
67
+
68
+ // skip if value is NULL
69
+ if val == nil {
70
+ continue
71
+ }
72
+
73
+ // set field if exists
74
+ if fi, ok := fidx[r.Name()]; ok {
75
+ fv := elem.Field(fi)
76
+ if ok := setValue(fv, val); !ok {
77
+ return fmt.Errorf("cannot assign value of type %T to %s", val, fv.Type())
78
+ }
79
+ }
80
+ }
81
+
82
+ return nil
83
+ }
84
+
85
+ func (w *decoder) Close() (err error) {
86
+ for _, c := range w.closers {
87
+ if e := c.Close(); e != nil {
88
+ err = e
89
+ }
90
+ }
91
+ return
92
+ }
93
+
94
+ // --------------------------------------------------------------------
95
+
96
+ func setValue(rv reflect.Value, v interface{}) bool {
97
+ if rv.Kind() == reflect.Ptr {
98
+ if rv.IsNil() {
99
+ if ev := reflect.New(rv.Type().Elem()); setValue(ev, v) {
100
+ rv.Set(ev)
101
+ return true
102
+ }
103
+ return false
104
+ }
105
+ return setValue(rv.Elem(), v)
106
+ }
107
+
108
+ switch vv := v.(type) {
109
+ case bool:
110
+ switch rv.Kind() {
111
+ case reflect.Bool:
112
+ rv.SetBool(vv)
113
+ return true
114
+ }
115
+ case []byte:
116
+ switch rv.Kind() {
117
+ case reflect.String:
118
+ rv.SetString(string(vv))
119
+ return true
120
+ case reflect.Slice:
121
+ if rv.Type() == byteSliceType {
122
+ rv.SetBytes(vv)
123
+ return true
124
+ }
125
+ }
126
+ case int, int8, int16, int32, int64:
127
+ switch rv.Kind() {
128
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
129
+ rv.SetInt(reflect.ValueOf(v).Int())
130
+ return true
131
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
132
+ rv.SetUint(uint64(reflect.ValueOf(v).Int()))
133
+ return true
134
+ }
135
+ case uint, uint8, uint16, uint32, uint64:
136
+ switch rv.Kind() {
137
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
138
+ rv.SetInt(int64(reflect.ValueOf(v).Uint()))
139
+ return true
140
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
141
+ rv.SetUint(reflect.ValueOf(v).Uint())
142
+ return true
143
+ }
144
+ case float32, float64:
145
+ switch rv.Kind() {
146
+ case reflect.Float32, reflect.Float64:
147
+ rv.SetFloat(reflect.ValueOf(v).Float())
148
+ return true
149
+ }
150
+ case kpq.Int96:
151
+ if rt := rv.Type(); rt == timeType {
152
+ ns := int64(binary.LittleEndian.Uint64(vv[:8]))
153
+ jd := int64(binary.LittleEndian.Uint32(vv[8:]))
154
+ ts := time.Unix((jd-2440588)*86400, ns)
155
+ rv.Set(reflect.ValueOf(ts))
156
+ return true
157
+ } else if rt == int96Type {
158
+ rv.Set(reflect.ValueOf(v))
159
+ return true
160
+ }
161
+ }
162
+
163
+ return false
164
+ }
165
+
166
+ var (
167
+ byteSliceType = reflect.TypeOf(([]byte)(nil))
168
+ int96Type = reflect.TypeOf(kpq.Int96{})
169
+ timeType = reflect.TypeOf(time.Time{})
170
+ )
@@ -0,0 +1,88 @@
1
+ package parquet_test
2
+
3
+ import (
4
+ "bytes"
5
+ "io"
6
+ "io/ioutil"
7
+ "os"
8
+ "time"
9
+
10
+ "github.com/bsm/feedx"
11
+ "github.com/bsm/feedx/ext/parquet"
12
+ . "github.com/onsi/ginkgo"
13
+ . "github.com/onsi/gomega"
14
+ )
15
+
16
+ var _ = Describe("Decoder", func() {
17
+ var subject feedx.FormatDecoder
18
+ var fixture *os.File
19
+
20
+ f32ptr := func(f float32) *float32 { return &f }
21
+
22
+ BeforeEach(func() {
23
+ var err error
24
+ fixture, err = os.Open("testdata/alltypes_plain.parquet")
25
+ Expect(err).NotTo(HaveOccurred())
26
+
27
+ format := &parquet.Format{BatchSize: 3}
28
+ subject, err = format.NewDecoder(fixture)
29
+ Expect(err).NotTo(HaveOccurred())
30
+ })
31
+
32
+ AfterEach(func() {
33
+ Expect(subject.Close()).To(Succeed())
34
+ Expect(fixture.Close()).To(Succeed())
35
+ })
36
+
37
+ It("should decode", func() {
38
+ v1 := new(mockStruct)
39
+ Expect(subject.Decode(v1)).To(Succeed())
40
+ Expect(v1).To(Equal(&mockStruct{
41
+ ID: 4,
42
+ Bool: true,
43
+ Float: f32ptr(0),
44
+ DateString: "03/01/09", ByteString: []byte("0"),
45
+ Timestamp: time.Unix(1235865600, 0),
46
+ }))
47
+
48
+ v2 := new(mockStruct)
49
+ Expect(subject.Decode(v2)).To(Succeed())
50
+ Expect(v2).To(Equal(&mockStruct{
51
+ ID: 5,
52
+ TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
53
+ Float: f32ptr(1.1), Double: 10.1,
54
+ DateString: "03/01/09", ByteString: []byte("1"),
55
+ Timestamp: time.Unix(1235865660, 0),
56
+ }))
57
+
58
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v3
59
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v4
60
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v5
61
+
62
+ v6 := new(mockStruct)
63
+ Expect(subject.Decode(v6)).To(Succeed())
64
+ Expect(v6).To(Equal(&mockStruct{
65
+ ID: 3,
66
+ Bool: false,
67
+ TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
68
+ Float: f32ptr(1.1), Double: 10.1,
69
+ DateString: "02/01/09", ByteString: []byte("1"),
70
+ Timestamp: time.Unix(1233446460, 0),
71
+ }))
72
+
73
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v7
74
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v8
75
+
76
+ v9 := new(mockStruct)
77
+ Expect(subject.Decode(v9)).To(MatchError(io.EOF))
78
+ })
79
+
80
+ It("should open from non-file readers", func() {
81
+ bin, err := ioutil.ReadFile("testdata/alltypes_plain.parquet")
82
+ Expect(err).NotTo(HaveOccurred())
83
+
84
+ dec, err := new(parquet.Format).NewDecoder(bytes.NewReader(bin))
85
+ Expect(err).NotTo(HaveOccurred())
86
+ Expect(dec.Close()).To(Succeed())
87
+ })
88
+ })