feedx 0.11.0 → 0.12.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.editorconfig +3 -0
- data/.github/workflows/test.yml +60 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +15 -4
- data/Gemfile +0 -2
- data/Gemfile.lock +80 -50
- data/Makefile +6 -6
- data/README.md +1 -1
- data/compression.go +18 -0
- data/compression_test.go +14 -2
- data/consumer_test.go +2 -2
- data/ext/parquet/decoder.go +170 -0
- data/ext/parquet/decoder_test.go +88 -0
- data/ext/parquet/go.mod +10 -0
- data/ext/parquet/go.sum +152 -0
- data/ext/parquet/parquet.go +78 -0
- data/ext/parquet/parquet_test.go +28 -0
- data/ext/parquet/reader.go +89 -0
- data/ext/parquet/testdata/alltypes_plain.parquet +0 -0
- data/ext/parquet/types.go +51 -0
- data/feedx.gemspec +5 -6
- data/feedx_test.go +2 -2
- data/format.go +45 -15
- data/format_test.go +4 -2
- data/go.mod +10 -5
- data/go.sum +90 -25
- data/internal/testdata/testdata.pb.go +176 -77
- data/lib/feedx/cache/abstract.rb +2 -2
- data/lib/feedx/cache/memory.rb +1 -0
- data/lib/feedx/compression/abstract.rb +2 -2
- data/lib/feedx/compression/gzip.rb +2 -2
- data/lib/feedx/compression/none.rb +2 -2
- data/lib/feedx/consumer.rb +15 -9
- data/lib/feedx/format.rb +4 -1
- data/lib/feedx/producer.rb +27 -22
- data/lib/feedx/stream.rb +30 -13
- data/producer_test.go +2 -2
- data/reader_test.go +2 -2
- data/spec/feedx/cache/memory_spec.rb +2 -2
- data/spec/feedx/cache/value_spec.rb +1 -1
- data/spec/feedx/compression/gzip_spec.rb +1 -1
- data/spec/feedx/compression/none_spec.rb +1 -1
- data/spec/feedx/compression_spec.rb +2 -2
- data/spec/feedx/consumer_spec.rb +5 -4
- data/spec/feedx/format/abstract_spec.rb +2 -1
- data/spec/feedx/format/json_spec.rb +6 -6
- data/spec/feedx/format/parquet_spec.rb +1 -1
- data/spec/feedx/format/protobuf_spec.rb +1 -1
- data/spec/feedx/format_spec.rb +2 -2
- data/spec/feedx/producer_spec.rb +15 -8
- data/spec/feedx/stream_spec.rb +36 -18
- data/writer_test.go +2 -2
- metadata +24 -23
- data/.travis.yml +0 -24
@@ -0,0 +1,88 @@
|
|
1
|
+
package parquet_test
|
2
|
+
|
3
|
+
import (
|
4
|
+
"bytes"
|
5
|
+
"io"
|
6
|
+
"io/ioutil"
|
7
|
+
"os"
|
8
|
+
"time"
|
9
|
+
|
10
|
+
"github.com/bsm/feedx"
|
11
|
+
"github.com/bsm/feedx/ext/parquet"
|
12
|
+
. "github.com/bsm/ginkgo"
|
13
|
+
. "github.com/bsm/gomega"
|
14
|
+
)
|
15
|
+
|
16
|
+
var _ = Describe("Decoder", func() {
|
17
|
+
var subject feedx.FormatDecoder
|
18
|
+
var fixture *os.File
|
19
|
+
|
20
|
+
f32ptr := func(f float32) *float32 { return &f }
|
21
|
+
|
22
|
+
BeforeEach(func() {
|
23
|
+
var err error
|
24
|
+
fixture, err = os.Open("testdata/alltypes_plain.parquet")
|
25
|
+
Expect(err).NotTo(HaveOccurred())
|
26
|
+
|
27
|
+
format := &parquet.Format{BatchSize: 3}
|
28
|
+
subject, err = format.NewDecoder(fixture)
|
29
|
+
Expect(err).NotTo(HaveOccurred())
|
30
|
+
})
|
31
|
+
|
32
|
+
AfterEach(func() {
|
33
|
+
Expect(subject.Close()).To(Succeed())
|
34
|
+
Expect(fixture.Close()).To(Succeed())
|
35
|
+
})
|
36
|
+
|
37
|
+
It("should decode", func() {
|
38
|
+
v1 := new(mockStruct)
|
39
|
+
Expect(subject.Decode(v1)).To(Succeed())
|
40
|
+
Expect(v1).To(Equal(&mockStruct{
|
41
|
+
ID: 4,
|
42
|
+
Bool: true,
|
43
|
+
Float: f32ptr(0),
|
44
|
+
DateString: "03/01/09", ByteString: []byte("0"),
|
45
|
+
Timestamp: time.Unix(1235865600, 0),
|
46
|
+
}))
|
47
|
+
|
48
|
+
v2 := new(mockStruct)
|
49
|
+
Expect(subject.Decode(v2)).To(Succeed())
|
50
|
+
Expect(v2).To(Equal(&mockStruct{
|
51
|
+
ID: 5,
|
52
|
+
TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
|
53
|
+
Float: f32ptr(1.1), Double: 10.1,
|
54
|
+
DateString: "03/01/09", ByteString: []byte("1"),
|
55
|
+
Timestamp: time.Unix(1235865660, 0),
|
56
|
+
}))
|
57
|
+
|
58
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v3
|
59
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v4
|
60
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v5
|
61
|
+
|
62
|
+
v6 := new(mockStruct)
|
63
|
+
Expect(subject.Decode(v6)).To(Succeed())
|
64
|
+
Expect(v6).To(Equal(&mockStruct{
|
65
|
+
ID: 3,
|
66
|
+
Bool: false,
|
67
|
+
TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
|
68
|
+
Float: f32ptr(1.1), Double: 10.1,
|
69
|
+
DateString: "02/01/09", ByteString: []byte("1"),
|
70
|
+
Timestamp: time.Unix(1233446460, 0),
|
71
|
+
}))
|
72
|
+
|
73
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v7
|
74
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v8
|
75
|
+
|
76
|
+
v9 := new(mockStruct)
|
77
|
+
Expect(subject.Decode(v9)).To(MatchError(io.EOF))
|
78
|
+
})
|
79
|
+
|
80
|
+
It("should open from non-file readers", func() {
|
81
|
+
bin, err := ioutil.ReadFile("testdata/alltypes_plain.parquet")
|
82
|
+
Expect(err).NotTo(HaveOccurred())
|
83
|
+
|
84
|
+
dec, err := new(parquet.Format).NewDecoder(bytes.NewReader(bin))
|
85
|
+
Expect(err).NotTo(HaveOccurred())
|
86
|
+
Expect(dec.Close()).To(Succeed())
|
87
|
+
})
|
88
|
+
})
|
data/ext/parquet/go.mod
ADDED
data/ext/parquet/go.sum
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
2
|
+
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
3
|
+
github.com/bmatcuk/doublestar/v3 v3.0.0 h1:TQtVPlDnAYwcrVNB2JiGuMc++H5qzWZd9PhkNo5WyHI=
|
4
|
+
github.com/bmatcuk/doublestar/v3 v3.0.0/go.mod h1:6PcTVMw80pCY1RVuoqu3V++99uQB3vsSYKPTd8AWA0k=
|
5
|
+
github.com/bsm/bfs v0.11.3 h1:BTFCftgmuVZwwu6vyjhyKr/Pg1E+cZ5tLodj3wKxr94=
|
6
|
+
github.com/bsm/bfs v0.11.3/go.mod h1:sUhBrbc9g0XThRRrT9hiinMhhKbkKIdhLkFljk4fuzM=
|
7
|
+
github.com/bsm/ginkgo v1.16.0/go.mod h1:RabIZLzOCPghgHJKUqHZpqrQETA5AnF4aCSIYy5C1bk=
|
8
|
+
github.com/bsm/ginkgo v1.16.1 h1:jp1v1dbmbGZDWmnGXDTN+XK3U1fTTNja9xYa7VBI0l0=
|
9
|
+
github.com/bsm/ginkgo v1.16.1/go.mod h1:RabIZLzOCPghgHJKUqHZpqrQETA5AnF4aCSIYy5C1bk=
|
10
|
+
github.com/bsm/gomega v1.11.0 h1:wg9DVGPETNZLIbMsseneMV1a7uo/x+wsCyNXdEcifDI=
|
11
|
+
github.com/bsm/gomega v1.11.0/go.mod h1:JifAceMQ4crZIWYUKrlGcmbN3bqHogVTADMD2ATsbwk=
|
12
|
+
github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00 h1:seSUCN95Kgv0mxmBUxx99faZYYQB4+TbE1Do6E5ZwI4=
|
13
|
+
github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00/go.mod h1:7EU1rAae6Y67NlxAbr+Mbxxxc5AFGcxZCSW/vbybDLs=
|
14
|
+
github.com/bsm/pbio v0.2.2 h1:Xdj5hQkS0K3kKc1NY6hoSWMvzpq0Mk5j8vcc7irslno=
|
15
|
+
github.com/bsm/pbio v0.2.2/go.mod h1:3O4XQFoarlYalkGd+zMFfEUyalP8NBKkQ0Ta4IwhN4w=
|
16
|
+
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
17
|
+
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
18
|
+
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
19
|
+
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
|
20
|
+
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
21
|
+
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
|
22
|
+
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
|
23
|
+
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
|
24
|
+
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
|
25
|
+
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
26
|
+
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
27
|
+
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
28
|
+
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
29
|
+
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
|
30
|
+
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
|
31
|
+
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
|
32
|
+
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
|
33
|
+
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
|
34
|
+
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
|
35
|
+
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
|
36
|
+
github.com/golang/protobuf v1.5.0 h1:LUVKkCeviFUMKqHa4tXIIij/lbhnMbP7Fn5wKdKkRh4=
|
37
|
+
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
|
38
|
+
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
|
39
|
+
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
40
|
+
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
41
|
+
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
42
|
+
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
43
|
+
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
44
|
+
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
45
|
+
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
|
46
|
+
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
47
|
+
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
|
48
|
+
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
49
|
+
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
50
|
+
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
51
|
+
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
52
|
+
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
53
|
+
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|
54
|
+
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
55
|
+
github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78=
|
56
|
+
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
|
57
|
+
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
58
|
+
github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
|
59
|
+
github.com/onsi/ginkgo v1.14.1 h1:jMU0WaQrP0a/YAEq8eJmJKjBoMs+pClEr1vDMlM/Do4=
|
60
|
+
github.com/onsi/ginkgo v1.14.1/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY=
|
61
|
+
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
|
62
|
+
github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
|
63
|
+
github.com/onsi/gomega v1.10.2 h1:aY/nuoWlKJud2J6U0E3NWsjlg+0GtwXxgEqthRdzlcs=
|
64
|
+
github.com/onsi/gomega v1.10.2/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
|
65
|
+
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
66
|
+
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
67
|
+
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
68
|
+
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
69
|
+
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
70
|
+
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
71
|
+
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
72
|
+
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
73
|
+
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
|
74
|
+
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
|
75
|
+
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
76
|
+
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
77
|
+
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
78
|
+
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
79
|
+
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
80
|
+
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
81
|
+
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
82
|
+
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
83
|
+
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
84
|
+
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
85
|
+
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
86
|
+
golang.org/x/net v0.0.0-20201021035429-f5854403a974 h1:IX6qOQeG5uLjB/hjjwjedwfjND0hgjPMMyO1RoIXQNI=
|
87
|
+
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
88
|
+
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
89
|
+
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
90
|
+
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
91
|
+
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
92
|
+
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
93
|
+
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
94
|
+
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
95
|
+
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
96
|
+
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
97
|
+
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
98
|
+
golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
99
|
+
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
100
|
+
golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
101
|
+
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
102
|
+
golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
103
|
+
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
|
104
|
+
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
105
|
+
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
106
|
+
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
107
|
+
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
|
108
|
+
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
109
|
+
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
110
|
+
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
111
|
+
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
|
112
|
+
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
|
113
|
+
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
|
114
|
+
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
115
|
+
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
116
|
+
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
117
|
+
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
118
|
+
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
119
|
+
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
120
|
+
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
|
121
|
+
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
122
|
+
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
|
123
|
+
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
|
124
|
+
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
|
125
|
+
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
|
126
|
+
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
|
127
|
+
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
|
128
|
+
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
|
129
|
+
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
|
130
|
+
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
|
131
|
+
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
|
132
|
+
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
|
133
|
+
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
|
134
|
+
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
|
135
|
+
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
136
|
+
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
137
|
+
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
138
|
+
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
|
139
|
+
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
|
140
|
+
google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk=
|
141
|
+
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
|
142
|
+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
143
|
+
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
|
144
|
+
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
145
|
+
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
|
146
|
+
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
|
147
|
+
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
|
148
|
+
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
149
|
+
gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
|
150
|
+
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
151
|
+
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
152
|
+
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
@@ -0,0 +1,78 @@
|
|
1
|
+
package parquet
|
2
|
+
|
3
|
+
import (
|
4
|
+
"fmt"
|
5
|
+
"io"
|
6
|
+
"io/ioutil"
|
7
|
+
"os"
|
8
|
+
|
9
|
+
"github.com/bsm/feedx"
|
10
|
+
)
|
11
|
+
|
12
|
+
// Format is a parquet format.
|
13
|
+
type Format struct {
|
14
|
+
TempDir string
|
15
|
+
Columns []string // column names to include
|
16
|
+
BatchSize int // batch size, default: 1,000
|
17
|
+
}
|
18
|
+
|
19
|
+
// NewDecoder implements Format.
|
20
|
+
func (f *Format) NewDecoder(r io.Reader) (feedx.FormatDecoder, error) {
|
21
|
+
if rs, ok := r.(io.ReadSeeker); ok {
|
22
|
+
return newDecoder(rs, f.Columns, f.BatchSize)
|
23
|
+
}
|
24
|
+
|
25
|
+
tmp, err := copyToTempFile(f.TempDir, r)
|
26
|
+
if err != nil {
|
27
|
+
return nil, err
|
28
|
+
}
|
29
|
+
|
30
|
+
dec, err := newDecoder(tmp, f.Columns, f.BatchSize)
|
31
|
+
if err != nil {
|
32
|
+
_ = tmp.Close()
|
33
|
+
return nil, err
|
34
|
+
}
|
35
|
+
dec.closers = append(dec.closers, tmp)
|
36
|
+
return dec, nil
|
37
|
+
}
|
38
|
+
|
39
|
+
// NewEncoder implements Format.
|
40
|
+
func (*Format) NewEncoder(w io.Writer) (feedx.FormatEncoder, error) {
|
41
|
+
return nil, fmt.Errorf("not implemented")
|
42
|
+
}
|
43
|
+
|
44
|
+
// --------------------------------------------------------------------
|
45
|
+
|
46
|
+
type tempFile struct{ *os.File }
|
47
|
+
|
48
|
+
func copyToTempFile(dir string, r io.Reader) (*tempFile, error) {
|
49
|
+
w, err := ioutil.TempFile(dir, "feedx-ext-parquet")
|
50
|
+
if err != nil {
|
51
|
+
return nil, err
|
52
|
+
}
|
53
|
+
if _, err := io.Copy(w, r); err != nil {
|
54
|
+
_ = w.Close()
|
55
|
+
_ = os.Remove(w.Name())
|
56
|
+
return nil, err
|
57
|
+
}
|
58
|
+
if err := w.Close(); err != nil {
|
59
|
+
_ = os.Remove(w.Name())
|
60
|
+
return nil, err
|
61
|
+
}
|
62
|
+
|
63
|
+
f, err := os.Open(w.Name())
|
64
|
+
if err != nil {
|
65
|
+
_ = os.Remove(w.Name())
|
66
|
+
return nil, err
|
67
|
+
}
|
68
|
+
|
69
|
+
return &tempFile{File: f}, nil
|
70
|
+
}
|
71
|
+
|
72
|
+
func (f tempFile) Close() error {
|
73
|
+
err := f.File.Close()
|
74
|
+
if e := os.Remove(f.Name()); e != nil {
|
75
|
+
err = e
|
76
|
+
}
|
77
|
+
return err
|
78
|
+
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
package parquet_test
|
2
|
+
|
3
|
+
import (
|
4
|
+
"testing"
|
5
|
+
"time"
|
6
|
+
|
7
|
+
. "github.com/bsm/ginkgo"
|
8
|
+
. "github.com/bsm/gomega"
|
9
|
+
)
|
10
|
+
|
11
|
+
type mockStruct struct {
|
12
|
+
ID int `parquet:"id"`
|
13
|
+
Bool bool `parquet:"bool_col"`
|
14
|
+
TinyInt int8 `parquet:"tinyint_col"`
|
15
|
+
SmallUint uint16 `parquet:"smallint_col"`
|
16
|
+
StdInt int `parquet:"int_col"`
|
17
|
+
BigInt int64 `parquet:"bigint_col"`
|
18
|
+
Float *float32 `parquet:"float_col"`
|
19
|
+
Double float64 `parquet:"double_col"`
|
20
|
+
DateString string `parquet:"date_string_col"`
|
21
|
+
ByteString []byte `parquet:"string_col"`
|
22
|
+
Timestamp time.Time `parquet:"timestamp_col"`
|
23
|
+
}
|
24
|
+
|
25
|
+
func TestSuite(t *testing.T) {
|
26
|
+
RegisterFailHandler(Fail)
|
27
|
+
RunSpecs(t, "feedx/ext/parquet")
|
28
|
+
}
|
@@ -0,0 +1,89 @@
|
|
1
|
+
package parquet
|
2
|
+
|
3
|
+
import (
|
4
|
+
"io"
|
5
|
+
|
6
|
+
kpq "github.com/bsm/parquet-go/parquet"
|
7
|
+
)
|
8
|
+
|
9
|
+
type columnReader struct {
|
10
|
+
file *kpq.File
|
11
|
+
col kpq.Column
|
12
|
+
rowGroup int
|
13
|
+
|
14
|
+
chunk *kpq.ColumnChunkReader
|
15
|
+
values []interface{}
|
16
|
+
dLevels []uint16
|
17
|
+
rLevels []uint16
|
18
|
+
|
19
|
+
n, i, vi int // chunk stats
|
20
|
+
}
|
21
|
+
|
22
|
+
func newColumnReader(file *kpq.File, col kpq.Column, batchSize int) *columnReader {
|
23
|
+
return &columnReader{
|
24
|
+
file: file,
|
25
|
+
col: col,
|
26
|
+
values: make([]interface{}, batchSize),
|
27
|
+
dLevels: make([]uint16, batchSize),
|
28
|
+
rLevels: make([]uint16, batchSize),
|
29
|
+
}
|
30
|
+
}
|
31
|
+
|
32
|
+
func (c *columnReader) Name() string {
|
33
|
+
return c.col.String()
|
34
|
+
}
|
35
|
+
|
36
|
+
func (c *columnReader) Next() (interface{}, error) {
|
37
|
+
if err := c.ensureChunk(); err != nil {
|
38
|
+
return nil, err
|
39
|
+
}
|
40
|
+
|
41
|
+
if err := c.ensureValues(); err == kpq.EndOfChunk {
|
42
|
+
c.chunk = nil
|
43
|
+
return c.Next()
|
44
|
+
} else if err != nil {
|
45
|
+
return nil, err
|
46
|
+
}
|
47
|
+
|
48
|
+
dLevel := c.dLevels[c.i]
|
49
|
+
c.i++
|
50
|
+
|
51
|
+
if notNull := dLevel == c.col.MaxD(); notNull {
|
52
|
+
val := c.values[c.vi]
|
53
|
+
c.vi++
|
54
|
+
return val, nil
|
55
|
+
}
|
56
|
+
|
57
|
+
return nil, nil
|
58
|
+
}
|
59
|
+
|
60
|
+
func (c *columnReader) ensureChunk() error {
|
61
|
+
if c.chunk != nil {
|
62
|
+
return nil
|
63
|
+
}
|
64
|
+
if c.rowGroup >= len(c.file.MetaData.RowGroups) {
|
65
|
+
return io.EOF
|
66
|
+
}
|
67
|
+
|
68
|
+
rd, err := c.file.NewReader(c.col, c.rowGroup)
|
69
|
+
if err != nil {
|
70
|
+
return err
|
71
|
+
}
|
72
|
+
c.chunk = rd
|
73
|
+
c.rowGroup++
|
74
|
+
return nil
|
75
|
+
}
|
76
|
+
|
77
|
+
func (c *columnReader) ensureValues() error {
|
78
|
+
if c.n != 0 && c.i < c.n {
|
79
|
+
return nil
|
80
|
+
}
|
81
|
+
|
82
|
+
n, err := c.chunk.Read(c.values, c.dLevels, c.rLevels)
|
83
|
+
if err != nil {
|
84
|
+
return err
|
85
|
+
}
|
86
|
+
|
87
|
+
c.n, c.i, c.vi = n, 0, 0
|
88
|
+
return nil
|
89
|
+
}
|