feedx 0.12.0 → 0.12.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.editorconfig +3 -0
- data/.github/workflows/test.yml +60 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +14 -5
- data/Gemfile +0 -2
- data/Gemfile.lock +60 -49
- data/Makefile +6 -6
- data/README.md +1 -1
- data/compression.go +18 -0
- data/compression_test.go +17 -5
- data/consumer.go +12 -3
- data/consumer_test.go +50 -19
- data/ext/parquet/decoder.go +170 -0
- data/ext/parquet/decoder_test.go +88 -0
- data/ext/parquet/go.mod +10 -0
- data/ext/parquet/go.sum +154 -0
- data/ext/parquet/parquet.go +78 -0
- data/ext/parquet/parquet_test.go +28 -0
- data/ext/parquet/reader.go +89 -0
- data/ext/parquet/testdata/alltypes_plain.parquet +0 -0
- data/ext/parquet/types.go +51 -0
- data/feedx.gemspec +5 -6
- data/feedx_ext_test.go +6 -0
- data/feedx_test.go +6 -6
- data/format.go +45 -15
- data/format_test.go +7 -5
- data/go.mod +10 -5
- data/go.sum +90 -25
- data/internal/testdata/testdata.pb.go +176 -77
- data/lib/feedx/cache/memory.rb +1 -0
- data/lib/feedx/consumer.rb +9 -6
- data/lib/feedx/format.rb +1 -1
- data/lib/feedx/producer.rb +20 -18
- data/lib/feedx/stream.rb +24 -8
- data/producer_test.go +4 -4
- data/reader_test.go +6 -5
- data/spec/feedx/cache/memory_spec.rb +2 -2
- data/spec/feedx/cache/value_spec.rb +1 -1
- data/spec/feedx/compression/gzip_spec.rb +1 -1
- data/spec/feedx/compression/none_spec.rb +1 -1
- data/spec/feedx/compression_spec.rb +2 -2
- data/spec/feedx/consumer_spec.rb +5 -4
- data/spec/feedx/format/abstract_spec.rb +2 -1
- data/spec/feedx/format/json_spec.rb +6 -6
- data/spec/feedx/format/parquet_spec.rb +1 -1
- data/spec/feedx/format/protobuf_spec.rb +1 -1
- data/spec/feedx/format_spec.rb +2 -2
- data/spec/feedx/producer_spec.rb +10 -9
- data/spec/feedx/stream_spec.rb +36 -18
- data/writer.go +1 -4
- data/writer_test.go +8 -8
- metadata +25 -23
- data/.travis.yml +0 -24
@@ -0,0 +1,170 @@
|
|
1
|
+
package parquet
|
2
|
+
|
3
|
+
import (
|
4
|
+
"encoding/binary"
|
5
|
+
"fmt"
|
6
|
+
"io"
|
7
|
+
"reflect"
|
8
|
+
"time"
|
9
|
+
|
10
|
+
kpq "github.com/bsm/parquet-go/parquet"
|
11
|
+
)
|
12
|
+
|
13
|
+
type decoder struct {
|
14
|
+
cols []*columnReader
|
15
|
+
closers []io.Closer
|
16
|
+
}
|
17
|
+
|
18
|
+
func newDecoder(rs io.ReadSeeker, names []string, batchSize int) (*decoder, error) {
|
19
|
+
file, err := kpq.FileFromReader(rs)
|
20
|
+
if err != nil {
|
21
|
+
return nil, err
|
22
|
+
}
|
23
|
+
|
24
|
+
// normalise column names
|
25
|
+
if len(names) == 0 {
|
26
|
+
for _, c := range file.Schema.Columns() {
|
27
|
+
names = append(names, c.String())
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
// normalise batch size
|
32
|
+
if batchSize < 1 {
|
33
|
+
batchSize = 1000
|
34
|
+
}
|
35
|
+
|
36
|
+
// initialise column buffers
|
37
|
+
cols := make([]*columnReader, 0, len(names))
|
38
|
+
for _, name := range names {
|
39
|
+
col, ok := file.Schema.ColumnByName(name)
|
40
|
+
if !ok {
|
41
|
+
_ = file.Close()
|
42
|
+
return nil, fmt.Errorf("column %q does not exist", name)
|
43
|
+
}
|
44
|
+
cols = append(cols, newColumnReader(file, col, batchSize))
|
45
|
+
}
|
46
|
+
|
47
|
+
return &decoder{cols: cols, closers: []io.Closer{file}}, nil
|
48
|
+
}
|
49
|
+
|
50
|
+
func (w *decoder) Decode(v interface{}) error {
|
51
|
+
rv := reflect.ValueOf(v)
|
52
|
+
rt := rv.Type()
|
53
|
+
if rt.Kind() != reflect.Ptr {
|
54
|
+
return fmt.Errorf("cannot decode non-pointer %s type", rt.String())
|
55
|
+
}
|
56
|
+
|
57
|
+
// field index by name
|
58
|
+
fidx := cachedTypeFields(rt.Elem())
|
59
|
+
elem := rv.Elem()
|
60
|
+
|
61
|
+
for _, r := range w.cols {
|
62
|
+
// next column value
|
63
|
+
val, err := r.Next()
|
64
|
+
if err != nil {
|
65
|
+
return err
|
66
|
+
}
|
67
|
+
|
68
|
+
// skip if value is NULL
|
69
|
+
if val == nil {
|
70
|
+
continue
|
71
|
+
}
|
72
|
+
|
73
|
+
// set field if exists
|
74
|
+
if fi, ok := fidx[r.Name()]; ok {
|
75
|
+
fv := elem.Field(fi)
|
76
|
+
if ok := setValue(fv, val); !ok {
|
77
|
+
return fmt.Errorf("cannot assign value of type %T to %s", val, fv.Type())
|
78
|
+
}
|
79
|
+
}
|
80
|
+
}
|
81
|
+
|
82
|
+
return nil
|
83
|
+
}
|
84
|
+
|
85
|
+
func (w *decoder) Close() (err error) {
|
86
|
+
for _, c := range w.closers {
|
87
|
+
if e := c.Close(); e != nil {
|
88
|
+
err = e
|
89
|
+
}
|
90
|
+
}
|
91
|
+
return
|
92
|
+
}
|
93
|
+
|
94
|
+
// --------------------------------------------------------------------
|
95
|
+
|
96
|
+
func setValue(rv reflect.Value, v interface{}) bool {
|
97
|
+
if rv.Kind() == reflect.Ptr {
|
98
|
+
if rv.IsNil() {
|
99
|
+
if ev := reflect.New(rv.Type().Elem()); setValue(ev, v) {
|
100
|
+
rv.Set(ev)
|
101
|
+
return true
|
102
|
+
}
|
103
|
+
return false
|
104
|
+
}
|
105
|
+
return setValue(rv.Elem(), v)
|
106
|
+
}
|
107
|
+
|
108
|
+
switch vv := v.(type) {
|
109
|
+
case bool:
|
110
|
+
switch rv.Kind() {
|
111
|
+
case reflect.Bool:
|
112
|
+
rv.SetBool(vv)
|
113
|
+
return true
|
114
|
+
}
|
115
|
+
case []byte:
|
116
|
+
switch rv.Kind() {
|
117
|
+
case reflect.String:
|
118
|
+
rv.SetString(string(vv))
|
119
|
+
return true
|
120
|
+
case reflect.Slice:
|
121
|
+
if rv.Type() == byteSliceType {
|
122
|
+
rv.SetBytes(vv)
|
123
|
+
return true
|
124
|
+
}
|
125
|
+
}
|
126
|
+
case int, int8, int16, int32, int64:
|
127
|
+
switch rv.Kind() {
|
128
|
+
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
129
|
+
rv.SetInt(reflect.ValueOf(v).Int())
|
130
|
+
return true
|
131
|
+
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
132
|
+
rv.SetUint(uint64(reflect.ValueOf(v).Int()))
|
133
|
+
return true
|
134
|
+
}
|
135
|
+
case uint, uint8, uint16, uint32, uint64:
|
136
|
+
switch rv.Kind() {
|
137
|
+
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
138
|
+
rv.SetInt(int64(reflect.ValueOf(v).Uint()))
|
139
|
+
return true
|
140
|
+
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
141
|
+
rv.SetUint(reflect.ValueOf(v).Uint())
|
142
|
+
return true
|
143
|
+
}
|
144
|
+
case float32, float64:
|
145
|
+
switch rv.Kind() {
|
146
|
+
case reflect.Float32, reflect.Float64:
|
147
|
+
rv.SetFloat(reflect.ValueOf(v).Float())
|
148
|
+
return true
|
149
|
+
}
|
150
|
+
case kpq.Int96:
|
151
|
+
if rt := rv.Type(); rt == timeType {
|
152
|
+
ns := int64(binary.LittleEndian.Uint64(vv[:8]))
|
153
|
+
jd := int64(binary.LittleEndian.Uint32(vv[8:]))
|
154
|
+
ts := time.Unix((jd-2440588)*86400, ns)
|
155
|
+
rv.Set(reflect.ValueOf(ts))
|
156
|
+
return true
|
157
|
+
} else if rt == int96Type {
|
158
|
+
rv.Set(reflect.ValueOf(v))
|
159
|
+
return true
|
160
|
+
}
|
161
|
+
}
|
162
|
+
|
163
|
+
return false
|
164
|
+
}
|
165
|
+
|
166
|
+
var (
|
167
|
+
byteSliceType = reflect.TypeOf(([]byte)(nil))
|
168
|
+
int96Type = reflect.TypeOf(kpq.Int96{})
|
169
|
+
timeType = reflect.TypeOf(time.Time{})
|
170
|
+
)
|
@@ -0,0 +1,88 @@
|
|
1
|
+
package parquet_test
|
2
|
+
|
3
|
+
import (
|
4
|
+
"bytes"
|
5
|
+
"io"
|
6
|
+
"io/ioutil"
|
7
|
+
"os"
|
8
|
+
"time"
|
9
|
+
|
10
|
+
"github.com/bsm/feedx"
|
11
|
+
"github.com/bsm/feedx/ext/parquet"
|
12
|
+
. "github.com/bsm/ginkgo"
|
13
|
+
. "github.com/bsm/gomega"
|
14
|
+
)
|
15
|
+
|
16
|
+
var _ = Describe("Decoder", func() {
|
17
|
+
var subject feedx.FormatDecoder
|
18
|
+
var fixture *os.File
|
19
|
+
|
20
|
+
f32ptr := func(f float32) *float32 { return &f }
|
21
|
+
|
22
|
+
BeforeEach(func() {
|
23
|
+
var err error
|
24
|
+
fixture, err = os.Open("testdata/alltypes_plain.parquet")
|
25
|
+
Expect(err).NotTo(HaveOccurred())
|
26
|
+
|
27
|
+
format := &parquet.Format{BatchSize: 3}
|
28
|
+
subject, err = format.NewDecoder(fixture)
|
29
|
+
Expect(err).NotTo(HaveOccurred())
|
30
|
+
})
|
31
|
+
|
32
|
+
AfterEach(func() {
|
33
|
+
Expect(subject.Close()).To(Succeed())
|
34
|
+
Expect(fixture.Close()).To(Succeed())
|
35
|
+
})
|
36
|
+
|
37
|
+
It("decodes", func() {
|
38
|
+
v1 := new(mockStruct)
|
39
|
+
Expect(subject.Decode(v1)).To(Succeed())
|
40
|
+
Expect(v1).To(Equal(&mockStruct{
|
41
|
+
ID: 4,
|
42
|
+
Bool: true,
|
43
|
+
Float: f32ptr(0),
|
44
|
+
DateString: "03/01/09", ByteString: []byte("0"),
|
45
|
+
Timestamp: time.Unix(1235865600, 0),
|
46
|
+
}))
|
47
|
+
|
48
|
+
v2 := new(mockStruct)
|
49
|
+
Expect(subject.Decode(v2)).To(Succeed())
|
50
|
+
Expect(v2).To(Equal(&mockStruct{
|
51
|
+
ID: 5,
|
52
|
+
TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
|
53
|
+
Float: f32ptr(1.1), Double: 10.1,
|
54
|
+
DateString: "03/01/09", ByteString: []byte("1"),
|
55
|
+
Timestamp: time.Unix(1235865660, 0),
|
56
|
+
}))
|
57
|
+
|
58
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v3
|
59
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v4
|
60
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v5
|
61
|
+
|
62
|
+
v6 := new(mockStruct)
|
63
|
+
Expect(subject.Decode(v6)).To(Succeed())
|
64
|
+
Expect(v6).To(Equal(&mockStruct{
|
65
|
+
ID: 3,
|
66
|
+
Bool: false,
|
67
|
+
TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
|
68
|
+
Float: f32ptr(1.1), Double: 10.1,
|
69
|
+
DateString: "02/01/09", ByteString: []byte("1"),
|
70
|
+
Timestamp: time.Unix(1233446460, 0),
|
71
|
+
}))
|
72
|
+
|
73
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v7
|
74
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v8
|
75
|
+
|
76
|
+
v9 := new(mockStruct)
|
77
|
+
Expect(subject.Decode(v9)).To(MatchError(io.EOF))
|
78
|
+
})
|
79
|
+
|
80
|
+
It("opens from non-file readers", func() {
|
81
|
+
bin, err := ioutil.ReadFile("testdata/alltypes_plain.parquet")
|
82
|
+
Expect(err).NotTo(HaveOccurred())
|
83
|
+
|
84
|
+
dec, err := new(parquet.Format).NewDecoder(bytes.NewReader(bin))
|
85
|
+
Expect(err).NotTo(HaveOccurred())
|
86
|
+
Expect(dec.Close()).To(Succeed())
|
87
|
+
})
|
88
|
+
})
|
data/ext/parquet/go.mod
ADDED
data/ext/parquet/go.sum
ADDED
@@ -0,0 +1,154 @@
|
|
1
|
+
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
2
|
+
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
3
|
+
github.com/bmatcuk/doublestar/v3 v3.0.0 h1:TQtVPlDnAYwcrVNB2JiGuMc++H5qzWZd9PhkNo5WyHI=
|
4
|
+
github.com/bmatcuk/doublestar/v3 v3.0.0/go.mod h1:6PcTVMw80pCY1RVuoqu3V++99uQB3vsSYKPTd8AWA0k=
|
5
|
+
github.com/bsm/bfs v0.11.3 h1:BTFCftgmuVZwwu6vyjhyKr/Pg1E+cZ5tLodj3wKxr94=
|
6
|
+
github.com/bsm/bfs v0.11.3/go.mod h1:sUhBrbc9g0XThRRrT9hiinMhhKbkKIdhLkFljk4fuzM=
|
7
|
+
github.com/bsm/feedx v0.12.5 h1:N751MFTRKRgoP5eG1S30EoUuHsI7aAxJ0aQZg+7xf/Y=
|
8
|
+
github.com/bsm/feedx v0.12.5/go.mod h1:l5YNhFomuWy9du+8+hznXMH8Hug2qSMik7b5Vipcy58=
|
9
|
+
github.com/bsm/ginkgo v1.16.0/go.mod h1:RabIZLzOCPghgHJKUqHZpqrQETA5AnF4aCSIYy5C1bk=
|
10
|
+
github.com/bsm/ginkgo v1.16.1 h1:jp1v1dbmbGZDWmnGXDTN+XK3U1fTTNja9xYa7VBI0l0=
|
11
|
+
github.com/bsm/ginkgo v1.16.1/go.mod h1:RabIZLzOCPghgHJKUqHZpqrQETA5AnF4aCSIYy5C1bk=
|
12
|
+
github.com/bsm/gomega v1.11.0 h1:wg9DVGPETNZLIbMsseneMV1a7uo/x+wsCyNXdEcifDI=
|
13
|
+
github.com/bsm/gomega v1.11.0/go.mod h1:JifAceMQ4crZIWYUKrlGcmbN3bqHogVTADMD2ATsbwk=
|
14
|
+
github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00 h1:seSUCN95Kgv0mxmBUxx99faZYYQB4+TbE1Do6E5ZwI4=
|
15
|
+
github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00/go.mod h1:7EU1rAae6Y67NlxAbr+Mbxxxc5AFGcxZCSW/vbybDLs=
|
16
|
+
github.com/bsm/pbio v0.2.2 h1:Xdj5hQkS0K3kKc1NY6hoSWMvzpq0Mk5j8vcc7irslno=
|
17
|
+
github.com/bsm/pbio v0.2.2/go.mod h1:3O4XQFoarlYalkGd+zMFfEUyalP8NBKkQ0Ta4IwhN4w=
|
18
|
+
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
19
|
+
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
20
|
+
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
21
|
+
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
|
22
|
+
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
23
|
+
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
|
24
|
+
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
|
25
|
+
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
|
26
|
+
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
|
27
|
+
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
28
|
+
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
29
|
+
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
30
|
+
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
31
|
+
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
|
32
|
+
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
|
33
|
+
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
|
34
|
+
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
|
35
|
+
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
|
36
|
+
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
|
37
|
+
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
|
38
|
+
github.com/golang/protobuf v1.5.0 h1:LUVKkCeviFUMKqHa4tXIIij/lbhnMbP7Fn5wKdKkRh4=
|
39
|
+
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
|
40
|
+
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
|
41
|
+
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
42
|
+
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
43
|
+
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
44
|
+
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
45
|
+
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
46
|
+
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
47
|
+
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
|
48
|
+
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
49
|
+
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
|
50
|
+
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
51
|
+
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
52
|
+
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
53
|
+
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
54
|
+
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
55
|
+
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|
56
|
+
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
57
|
+
github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78=
|
58
|
+
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
|
59
|
+
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
60
|
+
github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
|
61
|
+
github.com/onsi/ginkgo v1.14.1 h1:jMU0WaQrP0a/YAEq8eJmJKjBoMs+pClEr1vDMlM/Do4=
|
62
|
+
github.com/onsi/ginkgo v1.14.1/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY=
|
63
|
+
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
|
64
|
+
github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
|
65
|
+
github.com/onsi/gomega v1.10.2 h1:aY/nuoWlKJud2J6U0E3NWsjlg+0GtwXxgEqthRdzlcs=
|
66
|
+
github.com/onsi/gomega v1.10.2/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
|
67
|
+
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
68
|
+
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
69
|
+
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
70
|
+
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
71
|
+
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
72
|
+
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
73
|
+
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
74
|
+
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
75
|
+
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
|
76
|
+
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
|
77
|
+
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
78
|
+
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
79
|
+
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
80
|
+
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
81
|
+
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
82
|
+
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
83
|
+
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
84
|
+
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
85
|
+
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
86
|
+
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
87
|
+
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
88
|
+
golang.org/x/net v0.0.0-20201021035429-f5854403a974 h1:IX6qOQeG5uLjB/hjjwjedwfjND0hgjPMMyO1RoIXQNI=
|
89
|
+
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
90
|
+
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
91
|
+
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
92
|
+
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
93
|
+
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
94
|
+
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
95
|
+
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
96
|
+
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
97
|
+
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
98
|
+
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
99
|
+
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
100
|
+
golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
101
|
+
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
102
|
+
golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
103
|
+
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
104
|
+
golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
105
|
+
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
|
106
|
+
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
107
|
+
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
108
|
+
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
109
|
+
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
|
110
|
+
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
111
|
+
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
112
|
+
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
113
|
+
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
|
114
|
+
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
|
115
|
+
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
|
116
|
+
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
117
|
+
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
118
|
+
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
119
|
+
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
120
|
+
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
121
|
+
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
122
|
+
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
|
123
|
+
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
124
|
+
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
|
125
|
+
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
|
126
|
+
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
|
127
|
+
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
|
128
|
+
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
|
129
|
+
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
|
130
|
+
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
|
131
|
+
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
|
132
|
+
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
|
133
|
+
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
|
134
|
+
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
|
135
|
+
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
|
136
|
+
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
|
137
|
+
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
138
|
+
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
139
|
+
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
140
|
+
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
|
141
|
+
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
|
142
|
+
google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk=
|
143
|
+
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
|
144
|
+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
145
|
+
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
|
146
|
+
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
147
|
+
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
|
148
|
+
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
|
149
|
+
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
|
150
|
+
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
151
|
+
gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
|
152
|
+
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
153
|
+
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
154
|
+
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
@@ -0,0 +1,78 @@
|
|
1
|
+
package parquet
|
2
|
+
|
3
|
+
import (
|
4
|
+
"fmt"
|
5
|
+
"io"
|
6
|
+
"io/ioutil"
|
7
|
+
"os"
|
8
|
+
|
9
|
+
"github.com/bsm/feedx"
|
10
|
+
)
|
11
|
+
|
12
|
+
// Format is a parquet format.
|
13
|
+
type Format struct {
|
14
|
+
TempDir string
|
15
|
+
Columns []string // column names to include
|
16
|
+
BatchSize int // batch size, default: 1,000
|
17
|
+
}
|
18
|
+
|
19
|
+
// NewDecoder implements Format.
|
20
|
+
func (f *Format) NewDecoder(r io.Reader) (feedx.FormatDecoder, error) {
|
21
|
+
if rs, ok := r.(io.ReadSeeker); ok {
|
22
|
+
return newDecoder(rs, f.Columns, f.BatchSize)
|
23
|
+
}
|
24
|
+
|
25
|
+
tmp, err := copyToTempFile(f.TempDir, r)
|
26
|
+
if err != nil {
|
27
|
+
return nil, err
|
28
|
+
}
|
29
|
+
|
30
|
+
dec, err := newDecoder(tmp, f.Columns, f.BatchSize)
|
31
|
+
if err != nil {
|
32
|
+
_ = tmp.Close()
|
33
|
+
return nil, err
|
34
|
+
}
|
35
|
+
dec.closers = append(dec.closers, tmp)
|
36
|
+
return dec, nil
|
37
|
+
}
|
38
|
+
|
39
|
+
// NewEncoder implements Format.
|
40
|
+
func (*Format) NewEncoder(w io.Writer) (feedx.FormatEncoder, error) {
|
41
|
+
return nil, fmt.Errorf("not implemented")
|
42
|
+
}
|
43
|
+
|
44
|
+
// --------------------------------------------------------------------
|
45
|
+
|
46
|
+
type tempFile struct{ *os.File }
|
47
|
+
|
48
|
+
func copyToTempFile(dir string, r io.Reader) (*tempFile, error) {
|
49
|
+
w, err := ioutil.TempFile(dir, "feedx-ext-parquet")
|
50
|
+
if err != nil {
|
51
|
+
return nil, err
|
52
|
+
}
|
53
|
+
if _, err := io.Copy(w, r); err != nil {
|
54
|
+
_ = w.Close()
|
55
|
+
_ = os.Remove(w.Name())
|
56
|
+
return nil, err
|
57
|
+
}
|
58
|
+
if err := w.Close(); err != nil {
|
59
|
+
_ = os.Remove(w.Name())
|
60
|
+
return nil, err
|
61
|
+
}
|
62
|
+
|
63
|
+
f, err := os.Open(w.Name())
|
64
|
+
if err != nil {
|
65
|
+
_ = os.Remove(w.Name())
|
66
|
+
return nil, err
|
67
|
+
}
|
68
|
+
|
69
|
+
return &tempFile{File: f}, nil
|
70
|
+
}
|
71
|
+
|
72
|
+
func (f tempFile) Close() error {
|
73
|
+
err := f.File.Close()
|
74
|
+
if e := os.Remove(f.Name()); e != nil {
|
75
|
+
err = e
|
76
|
+
}
|
77
|
+
return err
|
78
|
+
}
|