feedx 0.12.2 → 0.12.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 898f80c359182952ab192f95fb05dc1e6ae806f0d661d71244813b779595d0f8
4
- data.tar.gz: 43778c6084131e8ffa9ba59b7836fe198f7595e5dc07af1c4e11a9b03ac49419
3
+ metadata.gz: c600f733aa3a0663939488c86cfd1d29abb4dde277ceb582a3e3d35c970830fd
4
+ data.tar.gz: 4ab6bbe4a53e31c4c2c408670f82a72fceb63128c0a22c7f57d0bf8c0eca95d2
5
5
  SHA512:
6
- metadata.gz: fa8f2e4e26c140bb4c2732655ac73c39ef20055f215ac39c8c62bd77e7cf27d7d8140287ed6c90a4c4da6bf461d491123f76784ce5d770189a45da7fea897965
7
- data.tar.gz: a35d5296c5aec452b450e95985535c98b6b99f1a872624b3ad3974cbb0f56b0e6d0ae8395edd849c97242ae0a77ee77537404cb3dce22882a3a310a6e0f04a04
6
+ metadata.gz: b3e1ef899a5dfafabad3eb45ebb56c672fc9348e52455e04c054967ea3e75b15e2cba3b74449868104204fa3bbf9d4bd4ff3e8c4b1e4db74cc8349982359e1f6
7
+ data.tar.gz: 3722d116710b5b16315af4fa7ac7e052f9c87efd1531e18d15000bc747119f7dececdd9bae02cac12b9dd363b00d033f710bf9e169986f3a036aab54194e64ef
@@ -7,3 +7,6 @@ end_of_line = lf
7
7
  charset = utf-8
8
8
  trim_trailing_whitespace = true
9
9
  insert_final_newline = true
10
+
11
+ [{*.go,Makefile}]
12
+ indent_style = tab
data/.gitignore CHANGED
@@ -1,3 +1,4 @@
1
1
  .rubocop-*
2
2
  pkg/
3
3
  *~
4
+ *.makefile
@@ -18,7 +18,7 @@ matrix:
18
18
  - sudo apt install -y libarrow-dev libarrow-glib-dev libarrow-dataset-dev libplasma-dev libplasma-glib-dev libgandiva-dev libgandiva-glib-dev libparquet-dev libparquet-glib-dev
19
19
  - language: go
20
20
  go:
21
- - 1.14.x
21
+ - 1.15.x
22
22
  - language: go
23
23
  go:
24
- - 1.13.x
24
+ - 1.14.x
data/Gemfile CHANGED
@@ -1,4 +1,2 @@
1
1
  source 'https://rubygems.org'
2
2
  gemspec
3
-
4
- gem 'google-protobuf', '>= 3.7.0-rc2'
@@ -1,16 +1,16 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- feedx (0.12.2)
5
- bfs (>= 0.5.0)
4
+ feedx (0.12.3)
5
+ bfs (>= 0.8.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
10
  ast (2.4.1)
11
- bfs (0.7.2)
11
+ bfs (0.8.0)
12
12
  diff-lcs (1.4.4)
13
- extpp (0.0.8)
13
+ extpp (0.0.9)
14
14
  gio2 (3.4.3)
15
15
  gobject-introspection (= 3.4.3)
16
16
  glib2 (3.4.3)
@@ -18,51 +18,52 @@ GEM
18
18
  pkg-config (>= 1.3.5)
19
19
  gobject-introspection (3.4.3)
20
20
  glib2 (= 3.4.3)
21
- google-protobuf (3.12.2)
21
+ google-protobuf (3.14.0)
22
22
  native-package-installer (1.0.9)
23
- parallel (1.19.2)
24
- parser (2.7.1.4)
23
+ parallel (1.20.1)
24
+ parser (2.7.2.0)
25
25
  ast (~> 2.4.1)
26
- pbio (0.2.1)
26
+ pbio (0.2.2)
27
27
  google-protobuf
28
- pkg-config (1.4.1)
28
+ pkg-config (1.4.4)
29
29
  rainbow (3.0.0)
30
30
  rake (13.0.1)
31
- red-arrow (0.17.1)
31
+ red-arrow (2.0.0)
32
32
  extpp (>= 0.0.7)
33
33
  gio2 (>= 3.3.6)
34
34
  native-package-installer
35
35
  pkg-config
36
- red-parquet (0.17.1)
37
- red-arrow (= 0.17.1)
38
- regexp_parser (1.7.1)
36
+ red-parquet (2.0.0)
37
+ red-arrow (= 2.0.0)
38
+ regexp_parser (2.0.0)
39
39
  rexml (3.2.4)
40
- rspec (3.9.0)
41
- rspec-core (~> 3.9.0)
42
- rspec-expectations (~> 3.9.0)
43
- rspec-mocks (~> 3.9.0)
44
- rspec-core (3.9.2)
45
- rspec-support (~> 3.9.3)
46
- rspec-expectations (3.9.2)
40
+ rspec (3.10.0)
41
+ rspec-core (~> 3.10.0)
42
+ rspec-expectations (~> 3.10.0)
43
+ rspec-mocks (~> 3.10.0)
44
+ rspec-core (3.10.0)
45
+ rspec-support (~> 3.10.0)
46
+ rspec-expectations (3.10.0)
47
47
  diff-lcs (>= 1.2.0, < 2.0)
48
- rspec-support (~> 3.9.0)
49
- rspec-mocks (3.9.1)
48
+ rspec-support (~> 3.10.0)
49
+ rspec-mocks (3.10.0)
50
50
  diff-lcs (>= 1.2.0, < 2.0)
51
- rspec-support (~> 3.9.0)
52
- rspec-support (3.9.3)
53
- rubocop (0.86.0)
51
+ rspec-support (~> 3.10.0)
52
+ rspec-support (3.10.0)
53
+ rubocop (1.4.2)
54
54
  parallel (~> 1.10)
55
- parser (>= 2.7.0.1)
55
+ parser (>= 2.7.1.5)
56
56
  rainbow (>= 2.2.2, < 4.0)
57
- regexp_parser (>= 1.7)
57
+ regexp_parser (>= 1.8)
58
58
  rexml
59
- rubocop-ast (>= 0.0.3, < 1.0)
59
+ rubocop-ast (>= 1.1.1)
60
60
  ruby-progressbar (~> 1.7)
61
61
  unicode-display_width (>= 1.4.0, < 2.0)
62
- rubocop-ast (0.1.0)
63
- parser (>= 2.7.0.1)
64
- rubocop-performance (1.6.1)
65
- rubocop (>= 0.71.0)
62
+ rubocop-ast (1.3.0)
63
+ parser (>= 2.7.1.5)
64
+ rubocop-performance (1.9.1)
65
+ rubocop (>= 0.90.0, < 2.0)
66
+ rubocop-ast (>= 0.4.0)
66
67
  ruby-progressbar (1.10.1)
67
68
  unicode-display_width (1.7.0)
68
69
 
@@ -72,10 +73,9 @@ PLATFORMS
72
73
  DEPENDENCIES
73
74
  bundler
74
75
  feedx!
75
- google-protobuf (>= 3.7.0.pre.rc2)
76
76
  pbio
77
77
  rake
78
- red-parquet
78
+ red-parquet (>= 1.0.0)
79
79
  rspec
80
80
  rubocop
81
81
  rubocop-performance
data/Makefile CHANGED
@@ -1,12 +1,12 @@
1
- default: vet test
1
+ default: test
2
2
 
3
- test:
4
- go test ./...
3
+ .common.makefile:
4
+ curl -fsSL -o $@ https://gitlab.com/bsm/misc/raw/master/make/go/common.makefile
5
5
 
6
- vet:
7
- go vet ./...
6
+ include .common.makefile
8
7
 
9
8
  proto: internal/testdata/testdata.pb.go
10
9
 
11
10
  %.pb.go: %.proto
12
- protoc -I=. --gogo_out=paths=source_relative:. $<
11
+ # may need to `go install google.golang.org/protobuf/cmd/protoc-gen-go`
12
+ protoc -I=. --go_out=paths=source_relative:. $<
@@ -1,6 +1,7 @@
1
1
  package feedx
2
2
 
3
3
  import (
4
+ "compress/flate"
4
5
  "compress/gzip"
5
6
  "io"
6
7
  "path"
@@ -20,6 +21,8 @@ func DetectCompression(name string) Compression {
20
21
  ext := path.Ext(path.Base(name))
21
22
  if ext != "" && ext[0] == '.' && ext[len(ext)-1] == 'z' {
22
23
  return GZipCompression
24
+ } else if ext == ".flate" {
25
+ return FlateCompression
23
26
  }
24
27
  }
25
28
  return NoCompression
@@ -61,3 +64,18 @@ func (gzipCompression) NewReader(r io.Reader) (io.ReadCloser, error) {
61
64
  func (gzipCompression) NewWriter(w io.Writer) (io.WriteCloser, error) {
62
65
  return gzip.NewWriter(w), nil
63
66
  }
67
+
68
+ // --------------------------------------------------------------------
69
+
70
+ // FlateCompression supports flate compression format.
71
+ var FlateCompression = flateCompression{}
72
+
73
+ type flateCompression struct{}
74
+
75
+ func (flateCompression) NewReader(r io.Reader) (io.ReadCloser, error) {
76
+ return flate.NewReader(r), nil
77
+ }
78
+
79
+ func (flateCompression) NewWriter(w io.Writer) (io.WriteCloser, error) {
80
+ return flate.NewWriter(w, flate.BestSpeed)
81
+ }
@@ -41,6 +41,9 @@ var _ = Describe("Compression", func() {
41
41
  Expect(feedx.DetectCompression("/path/to/file.pb.gz")).To(Equal(feedx.GZipCompression))
42
42
  Expect(feedx.DetectCompression("/path/to/file.pbz")).To(Equal(feedx.GZipCompression))
43
43
 
44
+ Expect(feedx.DetectCompression("/path/to/file.flate")).To(Equal(feedx.FlateCompression))
45
+ Expect(feedx.DetectCompression("/path/to/file.whatever.flate")).To(Equal(feedx.FlateCompression))
46
+
44
47
  Expect(feedx.DetectCompression("")).To(Equal(feedx.NoCompression))
45
48
  Expect(feedx.DetectCompression("/path/to/file")).To(Equal(feedx.NoCompression))
46
49
  Expect(feedx.DetectCompression("/path/to/file.txt")).To(Equal(feedx.NoCompression))
@@ -63,4 +66,13 @@ var _ = Describe("Compression", func() {
63
66
  runSharedTest(subject)
64
67
  })
65
68
  })
69
+
70
+ Describe("FlateCompression", func() {
71
+ var subject = feedx.FlateCompression
72
+ var _ feedx.Compression = subject
73
+
74
+ It("should write/read", func() {
75
+ runSharedTest(subject)
76
+ })
77
+ })
66
78
  })
@@ -0,0 +1,170 @@
1
+ package parquet
2
+
3
+ import (
4
+ "encoding/binary"
5
+ "fmt"
6
+ "io"
7
+ "reflect"
8
+ "time"
9
+
10
+ kpq "github.com/bsm/parquet-go/parquet"
11
+ )
12
+
13
+ type decoder struct {
14
+ cols []*columnReader
15
+ closers []io.Closer
16
+ }
17
+
18
+ func newDecoder(rs io.ReadSeeker, names []string, batchSize int) (*decoder, error) {
19
+ file, err := kpq.FileFromReader(rs)
20
+ if err != nil {
21
+ return nil, err
22
+ }
23
+
24
+ // normalise column names
25
+ if len(names) == 0 {
26
+ for _, c := range file.Schema.Columns() {
27
+ names = append(names, c.String())
28
+ }
29
+ }
30
+
31
+ // normalise batch size
32
+ if batchSize < 1 {
33
+ batchSize = 1000
34
+ }
35
+
36
+ // initialise column buffers
37
+ cols := make([]*columnReader, 0, len(names))
38
+ for _, name := range names {
39
+ col, ok := file.Schema.ColumnByName(name)
40
+ if !ok {
41
+ _ = file.Close()
42
+ return nil, fmt.Errorf("column %q does not exist", name)
43
+ }
44
+ cols = append(cols, newColumnReader(file, col, batchSize))
45
+ }
46
+
47
+ return &decoder{cols: cols, closers: []io.Closer{file}}, nil
48
+ }
49
+
50
+ func (w *decoder) Decode(v interface{}) error {
51
+ rv := reflect.ValueOf(v)
52
+ rt := rv.Type()
53
+ if rt.Kind() != reflect.Ptr {
54
+ return fmt.Errorf("cannot decode non-pointer %s type", rt.String())
55
+ }
56
+
57
+ // field index by name
58
+ fidx := cachedTypeFields(rt.Elem())
59
+ elem := rv.Elem()
60
+
61
+ for _, r := range w.cols {
62
+ // next column value
63
+ val, err := r.Next()
64
+ if err != nil {
65
+ return err
66
+ }
67
+
68
+ // skip if value is NULL
69
+ if val == nil {
70
+ continue
71
+ }
72
+
73
+ // set field if exists
74
+ if fi, ok := fidx[r.Name()]; ok {
75
+ fv := elem.Field(fi)
76
+ if ok := setValue(fv, val); !ok {
77
+ return fmt.Errorf("cannot assign value of type %T to %s", val, fv.Type())
78
+ }
79
+ }
80
+ }
81
+
82
+ return nil
83
+ }
84
+
85
+ func (w *decoder) Close() (err error) {
86
+ for _, c := range w.closers {
87
+ if e := c.Close(); e != nil {
88
+ err = e
89
+ }
90
+ }
91
+ return
92
+ }
93
+
94
+ // --------------------------------------------------------------------
95
+
96
+ func setValue(rv reflect.Value, v interface{}) bool {
97
+ if rv.Kind() == reflect.Ptr {
98
+ if rv.IsNil() {
99
+ if ev := reflect.New(rv.Type().Elem()); setValue(ev, v) {
100
+ rv.Set(ev)
101
+ return true
102
+ }
103
+ return false
104
+ }
105
+ return setValue(rv.Elem(), v)
106
+ }
107
+
108
+ switch vv := v.(type) {
109
+ case bool:
110
+ switch rv.Kind() {
111
+ case reflect.Bool:
112
+ rv.SetBool(vv)
113
+ return true
114
+ }
115
+ case []byte:
116
+ switch rv.Kind() {
117
+ case reflect.String:
118
+ rv.SetString(string(vv))
119
+ return true
120
+ case reflect.Slice:
121
+ if rv.Type() == byteSliceType {
122
+ rv.SetBytes(vv)
123
+ return true
124
+ }
125
+ }
126
+ case int, int8, int16, int32, int64:
127
+ switch rv.Kind() {
128
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
129
+ rv.SetInt(reflect.ValueOf(v).Int())
130
+ return true
131
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
132
+ rv.SetUint(uint64(reflect.ValueOf(v).Int()))
133
+ return true
134
+ }
135
+ case uint, uint8, uint16, uint32, uint64:
136
+ switch rv.Kind() {
137
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
138
+ rv.SetInt(int64(reflect.ValueOf(v).Uint()))
139
+ return true
140
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
141
+ rv.SetUint(reflect.ValueOf(v).Uint())
142
+ return true
143
+ }
144
+ case float32, float64:
145
+ switch rv.Kind() {
146
+ case reflect.Float32, reflect.Float64:
147
+ rv.SetFloat(reflect.ValueOf(v).Float())
148
+ return true
149
+ }
150
+ case kpq.Int96:
151
+ if rt := rv.Type(); rt == timeType {
152
+ ns := int64(binary.LittleEndian.Uint64(vv[:8]))
153
+ jd := int64(binary.LittleEndian.Uint32(vv[8:]))
154
+ ts := time.Unix((jd-2440588)*86400, ns)
155
+ rv.Set(reflect.ValueOf(ts))
156
+ return true
157
+ } else if rt == int96Type {
158
+ rv.Set(reflect.ValueOf(v))
159
+ return true
160
+ }
161
+ }
162
+
163
+ return false
164
+ }
165
+
166
+ var (
167
+ byteSliceType = reflect.TypeOf(([]byte)(nil))
168
+ int96Type = reflect.TypeOf(kpq.Int96{})
169
+ timeType = reflect.TypeOf(time.Time{})
170
+ )
@@ -0,0 +1,88 @@
1
+ package parquet_test
2
+
3
+ import (
4
+ "bytes"
5
+ "io"
6
+ "io/ioutil"
7
+ "os"
8
+ "time"
9
+
10
+ "github.com/bsm/feedx"
11
+ "github.com/bsm/feedx/ext/parquet"
12
+ . "github.com/onsi/ginkgo"
13
+ . "github.com/onsi/gomega"
14
+ )
15
+
16
+ var _ = Describe("Decoder", func() {
17
+ var subject feedx.FormatDecoder
18
+ var fixture *os.File
19
+
20
+ f32ptr := func(f float32) *float32 { return &f }
21
+
22
+ BeforeEach(func() {
23
+ var err error
24
+ fixture, err = os.Open("testdata/alltypes_plain.parquet")
25
+ Expect(err).NotTo(HaveOccurred())
26
+
27
+ format := &parquet.Format{BatchSize: 3}
28
+ subject, err = format.NewDecoder(fixture)
29
+ Expect(err).NotTo(HaveOccurred())
30
+ })
31
+
32
+ AfterEach(func() {
33
+ Expect(subject.Close()).To(Succeed())
34
+ Expect(fixture.Close()).To(Succeed())
35
+ })
36
+
37
+ It("should decode", func() {
38
+ v1 := new(mockStruct)
39
+ Expect(subject.Decode(v1)).To(Succeed())
40
+ Expect(v1).To(Equal(&mockStruct{
41
+ ID: 4,
42
+ Bool: true,
43
+ Float: f32ptr(0),
44
+ DateString: "03/01/09", ByteString: []byte("0"),
45
+ Timestamp: time.Unix(1235865600, 0),
46
+ }))
47
+
48
+ v2 := new(mockStruct)
49
+ Expect(subject.Decode(v2)).To(Succeed())
50
+ Expect(v2).To(Equal(&mockStruct{
51
+ ID: 5,
52
+ TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
53
+ Float: f32ptr(1.1), Double: 10.1,
54
+ DateString: "03/01/09", ByteString: []byte("1"),
55
+ Timestamp: time.Unix(1235865660, 0),
56
+ }))
57
+
58
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v3
59
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v4
60
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v5
61
+
62
+ v6 := new(mockStruct)
63
+ Expect(subject.Decode(v6)).To(Succeed())
64
+ Expect(v6).To(Equal(&mockStruct{
65
+ ID: 3,
66
+ Bool: false,
67
+ TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
68
+ Float: f32ptr(1.1), Double: 10.1,
69
+ DateString: "02/01/09", ByteString: []byte("1"),
70
+ Timestamp: time.Unix(1233446460, 0),
71
+ }))
72
+
73
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v7
74
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v8
75
+
76
+ v9 := new(mockStruct)
77
+ Expect(subject.Decode(v9)).To(MatchError(io.EOF))
78
+ })
79
+
80
+ It("should open from non-file readers", func() {
81
+ bin, err := ioutil.ReadFile("testdata/alltypes_plain.parquet")
82
+ Expect(err).NotTo(HaveOccurred())
83
+
84
+ dec, err := new(parquet.Format).NewDecoder(bytes.NewReader(bin))
85
+ Expect(err).NotTo(HaveOccurred())
86
+ Expect(dec.Close()).To(Succeed())
87
+ })
88
+ })