feedx 0.12.0 → 0.12.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.editorconfig +3 -0
- data/.github/workflows/test.yml +60 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +14 -5
- data/Gemfile +0 -2
- data/Gemfile.lock +60 -49
- data/Makefile +6 -6
- data/README.md +1 -1
- data/compression.go +18 -0
- data/compression_test.go +17 -5
- data/consumer.go +12 -3
- data/consumer_test.go +50 -19
- data/ext/parquet/decoder.go +170 -0
- data/ext/parquet/decoder_test.go +88 -0
- data/ext/parquet/go.mod +10 -0
- data/ext/parquet/go.sum +154 -0
- data/ext/parquet/parquet.go +78 -0
- data/ext/parquet/parquet_test.go +28 -0
- data/ext/parquet/reader.go +89 -0
- data/ext/parquet/testdata/alltypes_plain.parquet +0 -0
- data/ext/parquet/types.go +51 -0
- data/feedx.gemspec +5 -6
- data/feedx_ext_test.go +6 -0
- data/feedx_test.go +6 -6
- data/format.go +45 -15
- data/format_test.go +7 -5
- data/go.mod +10 -5
- data/go.sum +90 -25
- data/internal/testdata/testdata.pb.go +176 -77
- data/lib/feedx/cache/memory.rb +1 -0
- data/lib/feedx/consumer.rb +9 -6
- data/lib/feedx/format.rb +1 -1
- data/lib/feedx/producer.rb +20 -18
- data/lib/feedx/stream.rb +24 -8
- data/producer_test.go +4 -4
- data/reader_test.go +6 -5
- data/spec/feedx/cache/memory_spec.rb +2 -2
- data/spec/feedx/cache/value_spec.rb +1 -1
- data/spec/feedx/compression/gzip_spec.rb +1 -1
- data/spec/feedx/compression/none_spec.rb +1 -1
- data/spec/feedx/compression_spec.rb +2 -2
- data/spec/feedx/consumer_spec.rb +5 -4
- data/spec/feedx/format/abstract_spec.rb +2 -1
- data/spec/feedx/format/json_spec.rb +6 -6
- data/spec/feedx/format/parquet_spec.rb +1 -1
- data/spec/feedx/format/protobuf_spec.rb +1 -1
- data/spec/feedx/format_spec.rb +2 -2
- data/spec/feedx/producer_spec.rb +10 -9
- data/spec/feedx/stream_spec.rb +36 -18
- data/writer.go +1 -4
- data/writer_test.go +8 -8
- metadata +25 -23
- data/.travis.yml +0 -24
@@ -0,0 +1,170 @@
|
|
1
|
+
package parquet
|
2
|
+
|
3
|
+
import (
|
4
|
+
"encoding/binary"
|
5
|
+
"fmt"
|
6
|
+
"io"
|
7
|
+
"reflect"
|
8
|
+
"time"
|
9
|
+
|
10
|
+
kpq "github.com/bsm/parquet-go/parquet"
|
11
|
+
)
|
12
|
+
|
13
|
+
type decoder struct {
|
14
|
+
cols []*columnReader
|
15
|
+
closers []io.Closer
|
16
|
+
}
|
17
|
+
|
18
|
+
func newDecoder(rs io.ReadSeeker, names []string, batchSize int) (*decoder, error) {
|
19
|
+
file, err := kpq.FileFromReader(rs)
|
20
|
+
if err != nil {
|
21
|
+
return nil, err
|
22
|
+
}
|
23
|
+
|
24
|
+
// normalise column names
|
25
|
+
if len(names) == 0 {
|
26
|
+
for _, c := range file.Schema.Columns() {
|
27
|
+
names = append(names, c.String())
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
// normalise batch size
|
32
|
+
if batchSize < 1 {
|
33
|
+
batchSize = 1000
|
34
|
+
}
|
35
|
+
|
36
|
+
// initialise column buffers
|
37
|
+
cols := make([]*columnReader, 0, len(names))
|
38
|
+
for _, name := range names {
|
39
|
+
col, ok := file.Schema.ColumnByName(name)
|
40
|
+
if !ok {
|
41
|
+
_ = file.Close()
|
42
|
+
return nil, fmt.Errorf("column %q does not exist", name)
|
43
|
+
}
|
44
|
+
cols = append(cols, newColumnReader(file, col, batchSize))
|
45
|
+
}
|
46
|
+
|
47
|
+
return &decoder{cols: cols, closers: []io.Closer{file}}, nil
|
48
|
+
}
|
49
|
+
|
50
|
+
func (w *decoder) Decode(v interface{}) error {
|
51
|
+
rv := reflect.ValueOf(v)
|
52
|
+
rt := rv.Type()
|
53
|
+
if rt.Kind() != reflect.Ptr {
|
54
|
+
return fmt.Errorf("cannot decode non-pointer %s type", rt.String())
|
55
|
+
}
|
56
|
+
|
57
|
+
// field index by name
|
58
|
+
fidx := cachedTypeFields(rt.Elem())
|
59
|
+
elem := rv.Elem()
|
60
|
+
|
61
|
+
for _, r := range w.cols {
|
62
|
+
// next column value
|
63
|
+
val, err := r.Next()
|
64
|
+
if err != nil {
|
65
|
+
return err
|
66
|
+
}
|
67
|
+
|
68
|
+
// skip if value is NULL
|
69
|
+
if val == nil {
|
70
|
+
continue
|
71
|
+
}
|
72
|
+
|
73
|
+
// set field if exists
|
74
|
+
if fi, ok := fidx[r.Name()]; ok {
|
75
|
+
fv := elem.Field(fi)
|
76
|
+
if ok := setValue(fv, val); !ok {
|
77
|
+
return fmt.Errorf("cannot assign value of type %T to %s", val, fv.Type())
|
78
|
+
}
|
79
|
+
}
|
80
|
+
}
|
81
|
+
|
82
|
+
return nil
|
83
|
+
}
|
84
|
+
|
85
|
+
func (w *decoder) Close() (err error) {
|
86
|
+
for _, c := range w.closers {
|
87
|
+
if e := c.Close(); e != nil {
|
88
|
+
err = e
|
89
|
+
}
|
90
|
+
}
|
91
|
+
return
|
92
|
+
}
|
93
|
+
|
94
|
+
// --------------------------------------------------------------------
|
95
|
+
|
96
|
+
func setValue(rv reflect.Value, v interface{}) bool {
|
97
|
+
if rv.Kind() == reflect.Ptr {
|
98
|
+
if rv.IsNil() {
|
99
|
+
if ev := reflect.New(rv.Type().Elem()); setValue(ev, v) {
|
100
|
+
rv.Set(ev)
|
101
|
+
return true
|
102
|
+
}
|
103
|
+
return false
|
104
|
+
}
|
105
|
+
return setValue(rv.Elem(), v)
|
106
|
+
}
|
107
|
+
|
108
|
+
switch vv := v.(type) {
|
109
|
+
case bool:
|
110
|
+
switch rv.Kind() {
|
111
|
+
case reflect.Bool:
|
112
|
+
rv.SetBool(vv)
|
113
|
+
return true
|
114
|
+
}
|
115
|
+
case []byte:
|
116
|
+
switch rv.Kind() {
|
117
|
+
case reflect.String:
|
118
|
+
rv.SetString(string(vv))
|
119
|
+
return true
|
120
|
+
case reflect.Slice:
|
121
|
+
if rv.Type() == byteSliceType {
|
122
|
+
rv.SetBytes(vv)
|
123
|
+
return true
|
124
|
+
}
|
125
|
+
}
|
126
|
+
case int, int8, int16, int32, int64:
|
127
|
+
switch rv.Kind() {
|
128
|
+
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
129
|
+
rv.SetInt(reflect.ValueOf(v).Int())
|
130
|
+
return true
|
131
|
+
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
132
|
+
rv.SetUint(uint64(reflect.ValueOf(v).Int()))
|
133
|
+
return true
|
134
|
+
}
|
135
|
+
case uint, uint8, uint16, uint32, uint64:
|
136
|
+
switch rv.Kind() {
|
137
|
+
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
138
|
+
rv.SetInt(int64(reflect.ValueOf(v).Uint()))
|
139
|
+
return true
|
140
|
+
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
141
|
+
rv.SetUint(reflect.ValueOf(v).Uint())
|
142
|
+
return true
|
143
|
+
}
|
144
|
+
case float32, float64:
|
145
|
+
switch rv.Kind() {
|
146
|
+
case reflect.Float32, reflect.Float64:
|
147
|
+
rv.SetFloat(reflect.ValueOf(v).Float())
|
148
|
+
return true
|
149
|
+
}
|
150
|
+
case kpq.Int96:
|
151
|
+
if rt := rv.Type(); rt == timeType {
|
152
|
+
ns := int64(binary.LittleEndian.Uint64(vv[:8]))
|
153
|
+
jd := int64(binary.LittleEndian.Uint32(vv[8:]))
|
154
|
+
ts := time.Unix((jd-2440588)*86400, ns)
|
155
|
+
rv.Set(reflect.ValueOf(ts))
|
156
|
+
return true
|
157
|
+
} else if rt == int96Type {
|
158
|
+
rv.Set(reflect.ValueOf(v))
|
159
|
+
return true
|
160
|
+
}
|
161
|
+
}
|
162
|
+
|
163
|
+
return false
|
164
|
+
}
|
165
|
+
|
166
|
+
var (
|
167
|
+
byteSliceType = reflect.TypeOf(([]byte)(nil))
|
168
|
+
int96Type = reflect.TypeOf(kpq.Int96{})
|
169
|
+
timeType = reflect.TypeOf(time.Time{})
|
170
|
+
)
|
@@ -0,0 +1,88 @@
|
|
1
|
+
package parquet_test
|
2
|
+
|
3
|
+
import (
|
4
|
+
"bytes"
|
5
|
+
"io"
|
6
|
+
"io/ioutil"
|
7
|
+
"os"
|
8
|
+
"time"
|
9
|
+
|
10
|
+
"github.com/bsm/feedx"
|
11
|
+
"github.com/bsm/feedx/ext/parquet"
|
12
|
+
. "github.com/bsm/ginkgo"
|
13
|
+
. "github.com/bsm/gomega"
|
14
|
+
)
|
15
|
+
|
16
|
+
var _ = Describe("Decoder", func() {
|
17
|
+
var subject feedx.FormatDecoder
|
18
|
+
var fixture *os.File
|
19
|
+
|
20
|
+
f32ptr := func(f float32) *float32 { return &f }
|
21
|
+
|
22
|
+
BeforeEach(func() {
|
23
|
+
var err error
|
24
|
+
fixture, err = os.Open("testdata/alltypes_plain.parquet")
|
25
|
+
Expect(err).NotTo(HaveOccurred())
|
26
|
+
|
27
|
+
format := &parquet.Format{BatchSize: 3}
|
28
|
+
subject, err = format.NewDecoder(fixture)
|
29
|
+
Expect(err).NotTo(HaveOccurred())
|
30
|
+
})
|
31
|
+
|
32
|
+
AfterEach(func() {
|
33
|
+
Expect(subject.Close()).To(Succeed())
|
34
|
+
Expect(fixture.Close()).To(Succeed())
|
35
|
+
})
|
36
|
+
|
37
|
+
It("decodes", func() {
|
38
|
+
v1 := new(mockStruct)
|
39
|
+
Expect(subject.Decode(v1)).To(Succeed())
|
40
|
+
Expect(v1).To(Equal(&mockStruct{
|
41
|
+
ID: 4,
|
42
|
+
Bool: true,
|
43
|
+
Float: f32ptr(0),
|
44
|
+
DateString: "03/01/09", ByteString: []byte("0"),
|
45
|
+
Timestamp: time.Unix(1235865600, 0),
|
46
|
+
}))
|
47
|
+
|
48
|
+
v2 := new(mockStruct)
|
49
|
+
Expect(subject.Decode(v2)).To(Succeed())
|
50
|
+
Expect(v2).To(Equal(&mockStruct{
|
51
|
+
ID: 5,
|
52
|
+
TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
|
53
|
+
Float: f32ptr(1.1), Double: 10.1,
|
54
|
+
DateString: "03/01/09", ByteString: []byte("1"),
|
55
|
+
Timestamp: time.Unix(1235865660, 0),
|
56
|
+
}))
|
57
|
+
|
58
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v3
|
59
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v4
|
60
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v5
|
61
|
+
|
62
|
+
v6 := new(mockStruct)
|
63
|
+
Expect(subject.Decode(v6)).To(Succeed())
|
64
|
+
Expect(v6).To(Equal(&mockStruct{
|
65
|
+
ID: 3,
|
66
|
+
Bool: false,
|
67
|
+
TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
|
68
|
+
Float: f32ptr(1.1), Double: 10.1,
|
69
|
+
DateString: "02/01/09", ByteString: []byte("1"),
|
70
|
+
Timestamp: time.Unix(1233446460, 0),
|
71
|
+
}))
|
72
|
+
|
73
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v7
|
74
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v8
|
75
|
+
|
76
|
+
v9 := new(mockStruct)
|
77
|
+
Expect(subject.Decode(v9)).To(MatchError(io.EOF))
|
78
|
+
})
|
79
|
+
|
80
|
+
It("opens from non-file readers", func() {
|
81
|
+
bin, err := ioutil.ReadFile("testdata/alltypes_plain.parquet")
|
82
|
+
Expect(err).NotTo(HaveOccurred())
|
83
|
+
|
84
|
+
dec, err := new(parquet.Format).NewDecoder(bytes.NewReader(bin))
|
85
|
+
Expect(err).NotTo(HaveOccurred())
|
86
|
+
Expect(dec.Close()).To(Succeed())
|
87
|
+
})
|
88
|
+
})
|
data/ext/parquet/go.mod
ADDED
data/ext/parquet/go.sum
ADDED
@@ -0,0 +1,154 @@
|
|
1
|
+
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
2
|
+
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
3
|
+
github.com/bmatcuk/doublestar/v3 v3.0.0 h1:TQtVPlDnAYwcrVNB2JiGuMc++H5qzWZd9PhkNo5WyHI=
|
4
|
+
github.com/bmatcuk/doublestar/v3 v3.0.0/go.mod h1:6PcTVMw80pCY1RVuoqu3V++99uQB3vsSYKPTd8AWA0k=
|
5
|
+
github.com/bsm/bfs v0.11.3 h1:BTFCftgmuVZwwu6vyjhyKr/Pg1E+cZ5tLodj3wKxr94=
|
6
|
+
github.com/bsm/bfs v0.11.3/go.mod h1:sUhBrbc9g0XThRRrT9hiinMhhKbkKIdhLkFljk4fuzM=
|
7
|
+
github.com/bsm/feedx v0.12.5 h1:N751MFTRKRgoP5eG1S30EoUuHsI7aAxJ0aQZg+7xf/Y=
|
8
|
+
github.com/bsm/feedx v0.12.5/go.mod h1:l5YNhFomuWy9du+8+hznXMH8Hug2qSMik7b5Vipcy58=
|
9
|
+
github.com/bsm/ginkgo v1.16.0/go.mod h1:RabIZLzOCPghgHJKUqHZpqrQETA5AnF4aCSIYy5C1bk=
|
10
|
+
github.com/bsm/ginkgo v1.16.1 h1:jp1v1dbmbGZDWmnGXDTN+XK3U1fTTNja9xYa7VBI0l0=
|
11
|
+
github.com/bsm/ginkgo v1.16.1/go.mod h1:RabIZLzOCPghgHJKUqHZpqrQETA5AnF4aCSIYy5C1bk=
|
12
|
+
github.com/bsm/gomega v1.11.0 h1:wg9DVGPETNZLIbMsseneMV1a7uo/x+wsCyNXdEcifDI=
|
13
|
+
github.com/bsm/gomega v1.11.0/go.mod h1:JifAceMQ4crZIWYUKrlGcmbN3bqHogVTADMD2ATsbwk=
|
14
|
+
github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00 h1:seSUCN95Kgv0mxmBUxx99faZYYQB4+TbE1Do6E5ZwI4=
|
15
|
+
github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00/go.mod h1:7EU1rAae6Y67NlxAbr+Mbxxxc5AFGcxZCSW/vbybDLs=
|
16
|
+
github.com/bsm/pbio v0.2.2 h1:Xdj5hQkS0K3kKc1NY6hoSWMvzpq0Mk5j8vcc7irslno=
|
17
|
+
github.com/bsm/pbio v0.2.2/go.mod h1:3O4XQFoarlYalkGd+zMFfEUyalP8NBKkQ0Ta4IwhN4w=
|
18
|
+
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
19
|
+
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
20
|
+
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
21
|
+
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
|
22
|
+
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
23
|
+
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
|
24
|
+
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
|
25
|
+
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
|
26
|
+
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
|
27
|
+
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
28
|
+
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
29
|
+
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
30
|
+
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
31
|
+
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
|
32
|
+
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
|
33
|
+
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
|
34
|
+
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
|
35
|
+
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
|
36
|
+
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
|
37
|
+
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
|
38
|
+
github.com/golang/protobuf v1.5.0 h1:LUVKkCeviFUMKqHa4tXIIij/lbhnMbP7Fn5wKdKkRh4=
|
39
|
+
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
|
40
|
+
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
|
41
|
+
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
42
|
+
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
43
|
+
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
44
|
+
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
45
|
+
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
46
|
+
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
47
|
+
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
|
48
|
+
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
49
|
+
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
|
50
|
+
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
51
|
+
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
52
|
+
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
53
|
+
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
54
|
+
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
55
|
+
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|
56
|
+
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
57
|
+
github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78=
|
58
|
+
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
|
59
|
+
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
60
|
+
github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
|
61
|
+
github.com/onsi/ginkgo v1.14.1 h1:jMU0WaQrP0a/YAEq8eJmJKjBoMs+pClEr1vDMlM/Do4=
|
62
|
+
github.com/onsi/ginkgo v1.14.1/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY=
|
63
|
+
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
|
64
|
+
github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
|
65
|
+
github.com/onsi/gomega v1.10.2 h1:aY/nuoWlKJud2J6U0E3NWsjlg+0GtwXxgEqthRdzlcs=
|
66
|
+
github.com/onsi/gomega v1.10.2/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
|
67
|
+
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
68
|
+
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
69
|
+
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
70
|
+
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
71
|
+
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
72
|
+
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
73
|
+
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
74
|
+
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
75
|
+
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
|
76
|
+
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
|
77
|
+
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
78
|
+
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
79
|
+
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
80
|
+
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
81
|
+
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
82
|
+
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
83
|
+
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
84
|
+
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
85
|
+
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
86
|
+
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
87
|
+
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
88
|
+
golang.org/x/net v0.0.0-20201021035429-f5854403a974 h1:IX6qOQeG5uLjB/hjjwjedwfjND0hgjPMMyO1RoIXQNI=
|
89
|
+
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
90
|
+
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
91
|
+
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
92
|
+
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
93
|
+
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
94
|
+
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
95
|
+
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
96
|
+
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
97
|
+
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
98
|
+
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
99
|
+
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
100
|
+
golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
101
|
+
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
102
|
+
golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
103
|
+
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
104
|
+
golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
105
|
+
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
|
106
|
+
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
107
|
+
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
108
|
+
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
109
|
+
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
|
110
|
+
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
111
|
+
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
112
|
+
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
113
|
+
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
|
114
|
+
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
|
115
|
+
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
|
116
|
+
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
117
|
+
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
118
|
+
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
119
|
+
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
120
|
+
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
121
|
+
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
122
|
+
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
|
123
|
+
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
124
|
+
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
|
125
|
+
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
|
126
|
+
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
|
127
|
+
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
|
128
|
+
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
|
129
|
+
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
|
130
|
+
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
|
131
|
+
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
|
132
|
+
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
|
133
|
+
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
|
134
|
+
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
|
135
|
+
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
|
136
|
+
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
|
137
|
+
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
138
|
+
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
139
|
+
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
140
|
+
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
|
141
|
+
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
|
142
|
+
google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk=
|
143
|
+
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
|
144
|
+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
145
|
+
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
|
146
|
+
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
147
|
+
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
|
148
|
+
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
|
149
|
+
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
|
150
|
+
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
151
|
+
gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
|
152
|
+
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
153
|
+
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
154
|
+
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
@@ -0,0 +1,78 @@
|
|
1
|
+
package parquet
|
2
|
+
|
3
|
+
import (
|
4
|
+
"fmt"
|
5
|
+
"io"
|
6
|
+
"io/ioutil"
|
7
|
+
"os"
|
8
|
+
|
9
|
+
"github.com/bsm/feedx"
|
10
|
+
)
|
11
|
+
|
12
|
+
// Format is a parquet format.
|
13
|
+
type Format struct {
|
14
|
+
TempDir string
|
15
|
+
Columns []string // column names to include
|
16
|
+
BatchSize int // batch size, default: 1,000
|
17
|
+
}
|
18
|
+
|
19
|
+
// NewDecoder implements Format.
|
20
|
+
func (f *Format) NewDecoder(r io.Reader) (feedx.FormatDecoder, error) {
|
21
|
+
if rs, ok := r.(io.ReadSeeker); ok {
|
22
|
+
return newDecoder(rs, f.Columns, f.BatchSize)
|
23
|
+
}
|
24
|
+
|
25
|
+
tmp, err := copyToTempFile(f.TempDir, r)
|
26
|
+
if err != nil {
|
27
|
+
return nil, err
|
28
|
+
}
|
29
|
+
|
30
|
+
dec, err := newDecoder(tmp, f.Columns, f.BatchSize)
|
31
|
+
if err != nil {
|
32
|
+
_ = tmp.Close()
|
33
|
+
return nil, err
|
34
|
+
}
|
35
|
+
dec.closers = append(dec.closers, tmp)
|
36
|
+
return dec, nil
|
37
|
+
}
|
38
|
+
|
39
|
+
// NewEncoder implements Format.
|
40
|
+
func (*Format) NewEncoder(w io.Writer) (feedx.FormatEncoder, error) {
|
41
|
+
return nil, fmt.Errorf("not implemented")
|
42
|
+
}
|
43
|
+
|
44
|
+
// --------------------------------------------------------------------
|
45
|
+
|
46
|
+
type tempFile struct{ *os.File }
|
47
|
+
|
48
|
+
func copyToTempFile(dir string, r io.Reader) (*tempFile, error) {
|
49
|
+
w, err := ioutil.TempFile(dir, "feedx-ext-parquet")
|
50
|
+
if err != nil {
|
51
|
+
return nil, err
|
52
|
+
}
|
53
|
+
if _, err := io.Copy(w, r); err != nil {
|
54
|
+
_ = w.Close()
|
55
|
+
_ = os.Remove(w.Name())
|
56
|
+
return nil, err
|
57
|
+
}
|
58
|
+
if err := w.Close(); err != nil {
|
59
|
+
_ = os.Remove(w.Name())
|
60
|
+
return nil, err
|
61
|
+
}
|
62
|
+
|
63
|
+
f, err := os.Open(w.Name())
|
64
|
+
if err != nil {
|
65
|
+
_ = os.Remove(w.Name())
|
66
|
+
return nil, err
|
67
|
+
}
|
68
|
+
|
69
|
+
return &tempFile{File: f}, nil
|
70
|
+
}
|
71
|
+
|
72
|
+
func (f tempFile) Close() error {
|
73
|
+
err := f.File.Close()
|
74
|
+
if e := os.Remove(f.Name()); e != nil {
|
75
|
+
err = e
|
76
|
+
}
|
77
|
+
return err
|
78
|
+
}
|