feedx 0.11.0 → 0.12.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.editorconfig +3 -0
  3. data/.github/workflows/test.yml +60 -0
  4. data/.gitignore +1 -0
  5. data/.rubocop.yml +15 -4
  6. data/Gemfile +0 -2
  7. data/Gemfile.lock +80 -50
  8. data/Makefile +6 -6
  9. data/README.md +1 -1
  10. data/compression.go +18 -0
  11. data/compression_test.go +14 -2
  12. data/consumer_test.go +2 -2
  13. data/ext/parquet/decoder.go +170 -0
  14. data/ext/parquet/decoder_test.go +88 -0
  15. data/ext/parquet/go.mod +10 -0
  16. data/ext/parquet/go.sum +152 -0
  17. data/ext/parquet/parquet.go +78 -0
  18. data/ext/parquet/parquet_test.go +28 -0
  19. data/ext/parquet/reader.go +89 -0
  20. data/ext/parquet/testdata/alltypes_plain.parquet +0 -0
  21. data/ext/parquet/types.go +51 -0
  22. data/feedx.gemspec +5 -6
  23. data/feedx_test.go +2 -2
  24. data/format.go +45 -15
  25. data/format_test.go +4 -2
  26. data/go.mod +10 -5
  27. data/go.sum +90 -25
  28. data/internal/testdata/testdata.pb.go +176 -77
  29. data/lib/feedx/cache/abstract.rb +2 -2
  30. data/lib/feedx/cache/memory.rb +1 -0
  31. data/lib/feedx/compression/abstract.rb +2 -2
  32. data/lib/feedx/compression/gzip.rb +2 -2
  33. data/lib/feedx/compression/none.rb +2 -2
  34. data/lib/feedx/consumer.rb +15 -9
  35. data/lib/feedx/format.rb +4 -1
  36. data/lib/feedx/producer.rb +27 -22
  37. data/lib/feedx/stream.rb +30 -13
  38. data/producer_test.go +2 -2
  39. data/reader_test.go +2 -2
  40. data/spec/feedx/cache/memory_spec.rb +2 -2
  41. data/spec/feedx/cache/value_spec.rb +1 -1
  42. data/spec/feedx/compression/gzip_spec.rb +1 -1
  43. data/spec/feedx/compression/none_spec.rb +1 -1
  44. data/spec/feedx/compression_spec.rb +2 -2
  45. data/spec/feedx/consumer_spec.rb +5 -4
  46. data/spec/feedx/format/abstract_spec.rb +2 -1
  47. data/spec/feedx/format/json_spec.rb +6 -6
  48. data/spec/feedx/format/parquet_spec.rb +1 -1
  49. data/spec/feedx/format/protobuf_spec.rb +1 -1
  50. data/spec/feedx/format_spec.rb +2 -2
  51. data/spec/feedx/producer_spec.rb +15 -8
  52. data/spec/feedx/stream_spec.rb +36 -18
  53. data/writer_test.go +2 -2
  54. metadata +24 -23
  55. data/.travis.yml +0 -24
@@ -0,0 +1,88 @@
1
+ package parquet_test
2
+
3
+ import (
4
+ "bytes"
5
+ "io"
6
+ "io/ioutil"
7
+ "os"
8
+ "time"
9
+
10
+ "github.com/bsm/feedx"
11
+ "github.com/bsm/feedx/ext/parquet"
12
+ . "github.com/bsm/ginkgo"
13
+ . "github.com/bsm/gomega"
14
+ )
15
+
16
+ var _ = Describe("Decoder", func() {
17
+ var subject feedx.FormatDecoder
18
+ var fixture *os.File
19
+
20
+ f32ptr := func(f float32) *float32 { return &f }
21
+
22
+ BeforeEach(func() {
23
+ var err error
24
+ fixture, err = os.Open("testdata/alltypes_plain.parquet")
25
+ Expect(err).NotTo(HaveOccurred())
26
+
27
+ format := &parquet.Format{BatchSize: 3}
28
+ subject, err = format.NewDecoder(fixture)
29
+ Expect(err).NotTo(HaveOccurred())
30
+ })
31
+
32
+ AfterEach(func() {
33
+ Expect(subject.Close()).To(Succeed())
34
+ Expect(fixture.Close()).To(Succeed())
35
+ })
36
+
37
+ It("should decode", func() {
38
+ v1 := new(mockStruct)
39
+ Expect(subject.Decode(v1)).To(Succeed())
40
+ Expect(v1).To(Equal(&mockStruct{
41
+ ID: 4,
42
+ Bool: true,
43
+ Float: f32ptr(0),
44
+ DateString: "03/01/09", ByteString: []byte("0"),
45
+ Timestamp: time.Unix(1235865600, 0),
46
+ }))
47
+
48
+ v2 := new(mockStruct)
49
+ Expect(subject.Decode(v2)).To(Succeed())
50
+ Expect(v2).To(Equal(&mockStruct{
51
+ ID: 5,
52
+ TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
53
+ Float: f32ptr(1.1), Double: 10.1,
54
+ DateString: "03/01/09", ByteString: []byte("1"),
55
+ Timestamp: time.Unix(1235865660, 0),
56
+ }))
57
+
58
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v3
59
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v4
60
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v5
61
+
62
+ v6 := new(mockStruct)
63
+ Expect(subject.Decode(v6)).To(Succeed())
64
+ Expect(v6).To(Equal(&mockStruct{
65
+ ID: 3,
66
+ Bool: false,
67
+ TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
68
+ Float: f32ptr(1.1), Double: 10.1,
69
+ DateString: "02/01/09", ByteString: []byte("1"),
70
+ Timestamp: time.Unix(1233446460, 0),
71
+ }))
72
+
73
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v7
74
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v8
75
+
76
+ v9 := new(mockStruct)
77
+ Expect(subject.Decode(v9)).To(MatchError(io.EOF))
78
+ })
79
+
80
+ It("should open from non-file readers", func() {
81
+ bin, err := ioutil.ReadFile("testdata/alltypes_plain.parquet")
82
+ Expect(err).NotTo(HaveOccurred())
83
+
84
+ dec, err := new(parquet.Format).NewDecoder(bytes.NewReader(bin))
85
+ Expect(err).NotTo(HaveOccurred())
86
+ Expect(dec.Close()).To(Succeed())
87
+ })
88
+ })
@@ -0,0 +1,10 @@
1
+ module github.com/bsm/feedx/ext/parquet
2
+
3
+ go 1.15
4
+
5
+ require (
6
+ github.com/bsm/feedx v0.0.0-00010101000000-000000000000
7
+ github.com/bsm/ginkgo v1.16.1
8
+ github.com/bsm/gomega v1.11.0
9
+ github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00
10
+ )
@@ -0,0 +1,152 @@
1
+ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
2
+ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
3
+ github.com/bmatcuk/doublestar/v3 v3.0.0 h1:TQtVPlDnAYwcrVNB2JiGuMc++H5qzWZd9PhkNo5WyHI=
4
+ github.com/bmatcuk/doublestar/v3 v3.0.0/go.mod h1:6PcTVMw80pCY1RVuoqu3V++99uQB3vsSYKPTd8AWA0k=
5
+ github.com/bsm/bfs v0.11.3 h1:BTFCftgmuVZwwu6vyjhyKr/Pg1E+cZ5tLodj3wKxr94=
6
+ github.com/bsm/bfs v0.11.3/go.mod h1:sUhBrbc9g0XThRRrT9hiinMhhKbkKIdhLkFljk4fuzM=
7
+ github.com/bsm/ginkgo v1.16.0/go.mod h1:RabIZLzOCPghgHJKUqHZpqrQETA5AnF4aCSIYy5C1bk=
8
+ github.com/bsm/ginkgo v1.16.1 h1:jp1v1dbmbGZDWmnGXDTN+XK3U1fTTNja9xYa7VBI0l0=
9
+ github.com/bsm/ginkgo v1.16.1/go.mod h1:RabIZLzOCPghgHJKUqHZpqrQETA5AnF4aCSIYy5C1bk=
10
+ github.com/bsm/gomega v1.11.0 h1:wg9DVGPETNZLIbMsseneMV1a7uo/x+wsCyNXdEcifDI=
11
+ github.com/bsm/gomega v1.11.0/go.mod h1:JifAceMQ4crZIWYUKrlGcmbN3bqHogVTADMD2ATsbwk=
12
+ github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00 h1:seSUCN95Kgv0mxmBUxx99faZYYQB4+TbE1Do6E5ZwI4=
13
+ github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00/go.mod h1:7EU1rAae6Y67NlxAbr+Mbxxxc5AFGcxZCSW/vbybDLs=
14
+ github.com/bsm/pbio v0.2.2 h1:Xdj5hQkS0K3kKc1NY6hoSWMvzpq0Mk5j8vcc7irslno=
15
+ github.com/bsm/pbio v0.2.2/go.mod h1:3O4XQFoarlYalkGd+zMFfEUyalP8NBKkQ0Ta4IwhN4w=
16
+ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
17
+ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
18
+ github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
19
+ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
20
+ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
21
+ github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
22
+ github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
23
+ github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
24
+ github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
25
+ github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
26
+ github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
27
+ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
28
+ github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
29
+ github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
30
+ github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
31
+ github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
32
+ github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
33
+ github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
34
+ github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
35
+ github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
36
+ github.com/golang/protobuf v1.5.0 h1:LUVKkCeviFUMKqHa4tXIIij/lbhnMbP7Fn5wKdKkRh4=
37
+ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
38
+ github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
39
+ github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
40
+ github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
41
+ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
42
+ github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
43
+ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
44
+ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
45
+ github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
46
+ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
47
+ github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
48
+ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
49
+ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
50
+ github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
51
+ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
52
+ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
53
+ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
54
+ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
55
+ github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78=
56
+ github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
57
+ github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
58
+ github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
59
+ github.com/onsi/ginkgo v1.14.1 h1:jMU0WaQrP0a/YAEq8eJmJKjBoMs+pClEr1vDMlM/Do4=
60
+ github.com/onsi/ginkgo v1.14.1/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY=
61
+ github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
62
+ github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
63
+ github.com/onsi/gomega v1.10.2 h1:aY/nuoWlKJud2J6U0E3NWsjlg+0GtwXxgEqthRdzlcs=
64
+ github.com/onsi/gomega v1.10.2/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
65
+ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
66
+ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
67
+ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
68
+ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
69
+ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
70
+ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
71
+ golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
72
+ golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
73
+ golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
74
+ golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
75
+ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
76
+ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
77
+ golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
78
+ golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
79
+ golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
80
+ golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
81
+ golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
82
+ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
83
+ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
84
+ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
85
+ golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
86
+ golang.org/x/net v0.0.0-20201021035429-f5854403a974 h1:IX6qOQeG5uLjB/hjjwjedwfjND0hgjPMMyO1RoIXQNI=
87
+ golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
88
+ golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
89
+ golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
90
+ golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
91
+ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
92
+ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
93
+ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
94
+ golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
95
+ golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
96
+ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
97
+ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
98
+ golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
99
+ golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
100
+ golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
101
+ golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
102
+ golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
103
+ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
104
+ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
105
+ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
106
+ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
107
+ golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
108
+ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
109
+ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
110
+ golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
111
+ golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
112
+ golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
113
+ golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
114
+ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
115
+ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
116
+ golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
117
+ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
118
+ golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
119
+ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
120
+ golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
121
+ golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
122
+ google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
123
+ google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
124
+ google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
125
+ google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
126
+ google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
127
+ google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
128
+ google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
129
+ google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
130
+ google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
131
+ google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
132
+ google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
133
+ google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
134
+ google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
135
+ google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
136
+ google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
137
+ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
138
+ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
139
+ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
140
+ google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk=
141
+ google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
142
+ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
143
+ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
144
+ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
145
+ gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
146
+ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
147
+ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
148
+ gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
149
+ gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
150
+ gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
151
+ honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
152
+ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
@@ -0,0 +1,78 @@
1
+ package parquet
2
+
3
+ import (
4
+ "fmt"
5
+ "io"
6
+ "io/ioutil"
7
+ "os"
8
+
9
+ "github.com/bsm/feedx"
10
+ )
11
+
12
+ // Format is a parquet format.
13
+ type Format struct {
14
+ TempDir string
15
+ Columns []string // column names to include
16
+ BatchSize int // batch size, default: 1,000
17
+ }
18
+
19
+ // NewDecoder implements Format.
20
+ func (f *Format) NewDecoder(r io.Reader) (feedx.FormatDecoder, error) {
21
+ if rs, ok := r.(io.ReadSeeker); ok {
22
+ return newDecoder(rs, f.Columns, f.BatchSize)
23
+ }
24
+
25
+ tmp, err := copyToTempFile(f.TempDir, r)
26
+ if err != nil {
27
+ return nil, err
28
+ }
29
+
30
+ dec, err := newDecoder(tmp, f.Columns, f.BatchSize)
31
+ if err != nil {
32
+ _ = tmp.Close()
33
+ return nil, err
34
+ }
35
+ dec.closers = append(dec.closers, tmp)
36
+ return dec, nil
37
+ }
38
+
39
+ // NewEncoder implements Format.
40
+ func (*Format) NewEncoder(w io.Writer) (feedx.FormatEncoder, error) {
41
+ return nil, fmt.Errorf("not implemented")
42
+ }
43
+
44
+ // --------------------------------------------------------------------
45
+
46
+ type tempFile struct{ *os.File }
47
+
48
+ func copyToTempFile(dir string, r io.Reader) (*tempFile, error) {
49
+ w, err := ioutil.TempFile(dir, "feedx-ext-parquet")
50
+ if err != nil {
51
+ return nil, err
52
+ }
53
+ if _, err := io.Copy(w, r); err != nil {
54
+ _ = w.Close()
55
+ _ = os.Remove(w.Name())
56
+ return nil, err
57
+ }
58
+ if err := w.Close(); err != nil {
59
+ _ = os.Remove(w.Name())
60
+ return nil, err
61
+ }
62
+
63
+ f, err := os.Open(w.Name())
64
+ if err != nil {
65
+ _ = os.Remove(w.Name())
66
+ return nil, err
67
+ }
68
+
69
+ return &tempFile{File: f}, nil
70
+ }
71
+
72
+ func (f tempFile) Close() error {
73
+ err := f.File.Close()
74
+ if e := os.Remove(f.Name()); e != nil {
75
+ err = e
76
+ }
77
+ return err
78
+ }
@@ -0,0 +1,28 @@
1
+ package parquet_test
2
+
3
+ import (
4
+ "testing"
5
+ "time"
6
+
7
+ . "github.com/bsm/ginkgo"
8
+ . "github.com/bsm/gomega"
9
+ )
10
+
11
+ type mockStruct struct {
12
+ ID int `parquet:"id"`
13
+ Bool bool `parquet:"bool_col"`
14
+ TinyInt int8 `parquet:"tinyint_col"`
15
+ SmallUint uint16 `parquet:"smallint_col"`
16
+ StdInt int `parquet:"int_col"`
17
+ BigInt int64 `parquet:"bigint_col"`
18
+ Float *float32 `parquet:"float_col"`
19
+ Double float64 `parquet:"double_col"`
20
+ DateString string `parquet:"date_string_col"`
21
+ ByteString []byte `parquet:"string_col"`
22
+ Timestamp time.Time `parquet:"timestamp_col"`
23
+ }
24
+
25
+ func TestSuite(t *testing.T) {
26
+ RegisterFailHandler(Fail)
27
+ RunSpecs(t, "feedx/ext/parquet")
28
+ }
@@ -0,0 +1,89 @@
1
+ package parquet
2
+
3
+ import (
4
+ "io"
5
+
6
+ kpq "github.com/bsm/parquet-go/parquet"
7
+ )
8
+
9
+ type columnReader struct {
10
+ file *kpq.File
11
+ col kpq.Column
12
+ rowGroup int
13
+
14
+ chunk *kpq.ColumnChunkReader
15
+ values []interface{}
16
+ dLevels []uint16
17
+ rLevels []uint16
18
+
19
+ n, i, vi int // chunk stats
20
+ }
21
+
22
+ func newColumnReader(file *kpq.File, col kpq.Column, batchSize int) *columnReader {
23
+ return &columnReader{
24
+ file: file,
25
+ col: col,
26
+ values: make([]interface{}, batchSize),
27
+ dLevels: make([]uint16, batchSize),
28
+ rLevels: make([]uint16, batchSize),
29
+ }
30
+ }
31
+
32
+ func (c *columnReader) Name() string {
33
+ return c.col.String()
34
+ }
35
+
36
+ func (c *columnReader) Next() (interface{}, error) {
37
+ if err := c.ensureChunk(); err != nil {
38
+ return nil, err
39
+ }
40
+
41
+ if err := c.ensureValues(); err == kpq.EndOfChunk {
42
+ c.chunk = nil
43
+ return c.Next()
44
+ } else if err != nil {
45
+ return nil, err
46
+ }
47
+
48
+ dLevel := c.dLevels[c.i]
49
+ c.i++
50
+
51
+ if notNull := dLevel == c.col.MaxD(); notNull {
52
+ val := c.values[c.vi]
53
+ c.vi++
54
+ return val, nil
55
+ }
56
+
57
+ return nil, nil
58
+ }
59
+
60
+ func (c *columnReader) ensureChunk() error {
61
+ if c.chunk != nil {
62
+ return nil
63
+ }
64
+ if c.rowGroup >= len(c.file.MetaData.RowGroups) {
65
+ return io.EOF
66
+ }
67
+
68
+ rd, err := c.file.NewReader(c.col, c.rowGroup)
69
+ if err != nil {
70
+ return err
71
+ }
72
+ c.chunk = rd
73
+ c.rowGroup++
74
+ return nil
75
+ }
76
+
77
+ func (c *columnReader) ensureValues() error {
78
+ if c.n != 0 && c.i < c.n {
79
+ return nil
80
+ }
81
+
82
+ n, err := c.chunk.Read(c.values, c.dLevels, c.rLevels)
83
+ if err != nil {
84
+ return err
85
+ }
86
+
87
+ c.n, c.i, c.vi = n, 0, 0
88
+ return nil
89
+ }