feedx 0.10.2 → 0.12.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.editorconfig +3 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +2 -0
- data/.travis.yml +12 -2
- data/Gemfile +0 -2
- data/Gemfile.lock +50 -30
- data/Makefile +10 -5
- data/compression.go +18 -0
- data/compression_test.go +12 -0
- data/consumer_test.go +5 -4
- data/ext/parquet/decoder.go +170 -0
- data/ext/parquet/decoder_test.go +88 -0
- data/ext/parquet/go.mod +12 -0
- data/ext/parquet/go.sum +134 -0
- data/ext/parquet/parquet.go +78 -0
- data/ext/parquet/parquet_test.go +28 -0
- data/ext/parquet/reader.go +89 -0
- data/ext/parquet/testdata/alltypes_plain.parquet +0 -0
- data/ext/parquet/types.go +51 -0
- data/feedx.gemspec +3 -2
- data/feedx_test.go +8 -24
- data/format.go +50 -20
- data/format_test.go +8 -6
- data/go.mod +9 -11
- data/go.sum +76 -28
- data/internal/testdata/testdata.pb.go +223 -0
- data/internal/testdata/testdata.proto +15 -0
- data/lib/feedx/cache/abstract.rb +2 -2
- data/lib/feedx/cache/memory.rb +1 -0
- data/lib/feedx/compression.rb +11 -4
- data/lib/feedx/compression/abstract.rb +2 -2
- data/lib/feedx/compression/gzip.rb +14 -16
- data/lib/feedx/compression/none.rb +4 -4
- data/lib/feedx/consumer.rb +15 -9
- data/lib/feedx/format.rb +18 -9
- data/lib/feedx/format/abstract.rb +42 -13
- data/lib/feedx/format/json.rb +12 -8
- data/lib/feedx/format/parquet.rb +102 -0
- data/lib/feedx/format/protobuf.rb +16 -8
- data/lib/feedx/producer.rb +27 -22
- data/lib/feedx/stream.rb +36 -23
- data/producer_test.go +1 -2
- data/reader_test.go +6 -6
- data/spec/feedx/compression/gzip_spec.rb +2 -2
- data/spec/feedx/compression/none_spec.rb +2 -2
- data/spec/feedx/compression_spec.rb +9 -9
- data/spec/feedx/consumer_spec.rb +1 -1
- data/spec/feedx/format/abstract_spec.rb +11 -8
- data/spec/feedx/format/json_spec.rb +17 -16
- data/spec/feedx/format/parquet_spec.rb +30 -0
- data/spec/feedx/format/protobuf_spec.rb +12 -11
- data/spec/feedx/format_spec.rb +8 -8
- data/spec/feedx/producer_spec.rb +6 -0
- data/spec/feedx/stream_spec.rb +43 -6
- data/spec/spec_helper.rb +17 -1
- metadata +33 -5
@@ -0,0 +1,88 @@
|
|
1
|
+
package parquet_test
|
2
|
+
|
3
|
+
import (
|
4
|
+
"bytes"
|
5
|
+
"io"
|
6
|
+
"io/ioutil"
|
7
|
+
"os"
|
8
|
+
"time"
|
9
|
+
|
10
|
+
"github.com/bsm/feedx"
|
11
|
+
"github.com/bsm/feedx/ext/parquet"
|
12
|
+
. "github.com/onsi/ginkgo"
|
13
|
+
. "github.com/onsi/gomega"
|
14
|
+
)
|
15
|
+
|
16
|
+
var _ = Describe("Decoder", func() {
|
17
|
+
var subject feedx.FormatDecoder
|
18
|
+
var fixture *os.File
|
19
|
+
|
20
|
+
f32ptr := func(f float32) *float32 { return &f }
|
21
|
+
|
22
|
+
BeforeEach(func() {
|
23
|
+
var err error
|
24
|
+
fixture, err = os.Open("testdata/alltypes_plain.parquet")
|
25
|
+
Expect(err).NotTo(HaveOccurred())
|
26
|
+
|
27
|
+
format := &parquet.Format{BatchSize: 3}
|
28
|
+
subject, err = format.NewDecoder(fixture)
|
29
|
+
Expect(err).NotTo(HaveOccurred())
|
30
|
+
})
|
31
|
+
|
32
|
+
AfterEach(func() {
|
33
|
+
Expect(subject.Close()).To(Succeed())
|
34
|
+
Expect(fixture.Close()).To(Succeed())
|
35
|
+
})
|
36
|
+
|
37
|
+
It("should decode", func() {
|
38
|
+
v1 := new(mockStruct)
|
39
|
+
Expect(subject.Decode(v1)).To(Succeed())
|
40
|
+
Expect(v1).To(Equal(&mockStruct{
|
41
|
+
ID: 4,
|
42
|
+
Bool: true,
|
43
|
+
Float: f32ptr(0),
|
44
|
+
DateString: "03/01/09", ByteString: []byte("0"),
|
45
|
+
Timestamp: time.Unix(1235865600, 0),
|
46
|
+
}))
|
47
|
+
|
48
|
+
v2 := new(mockStruct)
|
49
|
+
Expect(subject.Decode(v2)).To(Succeed())
|
50
|
+
Expect(v2).To(Equal(&mockStruct{
|
51
|
+
ID: 5,
|
52
|
+
TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
|
53
|
+
Float: f32ptr(1.1), Double: 10.1,
|
54
|
+
DateString: "03/01/09", ByteString: []byte("1"),
|
55
|
+
Timestamp: time.Unix(1235865660, 0),
|
56
|
+
}))
|
57
|
+
|
58
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v3
|
59
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v4
|
60
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v5
|
61
|
+
|
62
|
+
v6 := new(mockStruct)
|
63
|
+
Expect(subject.Decode(v6)).To(Succeed())
|
64
|
+
Expect(v6).To(Equal(&mockStruct{
|
65
|
+
ID: 3,
|
66
|
+
Bool: false,
|
67
|
+
TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
|
68
|
+
Float: f32ptr(1.1), Double: 10.1,
|
69
|
+
DateString: "02/01/09", ByteString: []byte("1"),
|
70
|
+
Timestamp: time.Unix(1233446460, 0),
|
71
|
+
}))
|
72
|
+
|
73
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v7
|
74
|
+
Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v8
|
75
|
+
|
76
|
+
v9 := new(mockStruct)
|
77
|
+
Expect(subject.Decode(v9)).To(MatchError(io.EOF))
|
78
|
+
})
|
79
|
+
|
80
|
+
It("should open from non-file readers", func() {
|
81
|
+
bin, err := ioutil.ReadFile("testdata/alltypes_plain.parquet")
|
82
|
+
Expect(err).NotTo(HaveOccurred())
|
83
|
+
|
84
|
+
dec, err := new(parquet.Format).NewDecoder(bytes.NewReader(bin))
|
85
|
+
Expect(err).NotTo(HaveOccurred())
|
86
|
+
Expect(dec.Close()).To(Succeed())
|
87
|
+
})
|
88
|
+
})
|
data/ext/parquet/go.mod
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
module github.com/bsm/feedx/ext/parquet
|
2
|
+
|
3
|
+
go 1.15
|
4
|
+
|
5
|
+
replace github.com/bsm/feedx => ../../
|
6
|
+
|
7
|
+
require (
|
8
|
+
github.com/bsm/feedx v0.12.2
|
9
|
+
github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00
|
10
|
+
github.com/onsi/ginkgo v1.14.1
|
11
|
+
github.com/onsi/gomega v1.10.2
|
12
|
+
)
|
data/ext/parquet/go.sum
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
2
|
+
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
3
|
+
github.com/bmatcuk/doublestar v1.2.2/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9MEoZQC/PmE=
|
4
|
+
github.com/bmatcuk/doublestar v1.3.2 h1:mzUncgFmpzNUhIITFqGdZ8nUU0O7JTJzRO8VdkeLCSo=
|
5
|
+
github.com/bmatcuk/doublestar v1.3.2/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9MEoZQC/PmE=
|
6
|
+
github.com/bsm/bfs v0.10.4 h1:59I1FBEcIku/1MfPyIEeBfKm+ICaJ4lVEcago/YeCLg=
|
7
|
+
github.com/bsm/bfs v0.10.4/go.mod h1:N3md8kQvlteRDcfc8tqw759yW98dhj+6seWEVcg4CmM=
|
8
|
+
github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00 h1:seSUCN95Kgv0mxmBUxx99faZYYQB4+TbE1Do6E5ZwI4=
|
9
|
+
github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00/go.mod h1:7EU1rAae6Y67NlxAbr+Mbxxxc5AFGcxZCSW/vbybDLs=
|
10
|
+
github.com/bsm/pbio v0.2.2 h1:Xdj5hQkS0K3kKc1NY6hoSWMvzpq0Mk5j8vcc7irslno=
|
11
|
+
github.com/bsm/pbio v0.2.2/go.mod h1:3O4XQFoarlYalkGd+zMFfEUyalP8NBKkQ0Ta4IwhN4w=
|
12
|
+
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
13
|
+
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
14
|
+
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
15
|
+
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
|
16
|
+
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
17
|
+
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
|
18
|
+
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
|
19
|
+
github.com/gogo/protobuf v1.3.1 h1:DqDEcV5aeaTmdFBePNpYsp3FlcVH/2ISVVM9Qf8PSls=
|
20
|
+
github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
|
21
|
+
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
22
|
+
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
23
|
+
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
24
|
+
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
25
|
+
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
|
26
|
+
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
|
27
|
+
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
|
28
|
+
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
|
29
|
+
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
|
30
|
+
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
|
31
|
+
github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
|
32
|
+
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
|
33
|
+
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
|
34
|
+
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
35
|
+
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
36
|
+
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
37
|
+
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
38
|
+
github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
|
39
|
+
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
40
|
+
github.com/google/go-cmp v0.5.0 h1:/QaMHBdZ26BB3SSst0Iwl10Epc+xhTquomWX0oZEB6w=
|
41
|
+
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
42
|
+
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
|
43
|
+
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
|
44
|
+
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
45
|
+
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
46
|
+
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
47
|
+
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
48
|
+
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|
49
|
+
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
50
|
+
github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78=
|
51
|
+
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
|
52
|
+
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
53
|
+
github.com/onsi/ginkgo v1.8.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
54
|
+
github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
|
55
|
+
github.com/onsi/ginkgo v1.14.1 h1:jMU0WaQrP0a/YAEq8eJmJKjBoMs+pClEr1vDMlM/Do4=
|
56
|
+
github.com/onsi/ginkgo v1.14.1/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY=
|
57
|
+
github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
|
58
|
+
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
|
59
|
+
github.com/onsi/gomega v1.10.1 h1:o0+MgICZLuZ7xjH7Vx6zS/zcu93/BEp1VwkIW1mEXCE=
|
60
|
+
github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
|
61
|
+
github.com/onsi/gomega v1.10.2 h1:aY/nuoWlKJud2J6U0E3NWsjlg+0GtwXxgEqthRdzlcs=
|
62
|
+
github.com/onsi/gomega v1.10.2/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
|
63
|
+
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
64
|
+
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
65
|
+
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
66
|
+
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
67
|
+
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
|
68
|
+
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
|
69
|
+
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
70
|
+
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
71
|
+
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
72
|
+
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
73
|
+
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
74
|
+
golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
75
|
+
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7 h1:AeiKBIuRw3UomYXSbLy0Mc2dDLfdtbT/IVn4keq83P0=
|
76
|
+
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
77
|
+
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
78
|
+
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
79
|
+
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
80
|
+
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
81
|
+
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
82
|
+
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
83
|
+
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
84
|
+
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
85
|
+
golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
86
|
+
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
87
|
+
golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
88
|
+
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
89
|
+
golang.org/x/sys v0.0.0-20200519105757-fe76b779f299 h1:DYfZAGf2WMFjMxbgTjaC+2HC7NkNAQs+6Q8b9WEB/F4=
|
90
|
+
golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
91
|
+
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
92
|
+
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
|
93
|
+
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
94
|
+
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
95
|
+
golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
96
|
+
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
97
|
+
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
|
98
|
+
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
|
99
|
+
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
|
100
|
+
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
101
|
+
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
102
|
+
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
|
103
|
+
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
|
104
|
+
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
|
105
|
+
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
|
106
|
+
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
|
107
|
+
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
|
108
|
+
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
|
109
|
+
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
|
110
|
+
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
|
111
|
+
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
|
112
|
+
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
|
113
|
+
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
|
114
|
+
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
|
115
|
+
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
116
|
+
google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM=
|
117
|
+
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
118
|
+
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
119
|
+
google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=
|
120
|
+
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
|
121
|
+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
122
|
+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
123
|
+
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
|
124
|
+
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
125
|
+
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
|
126
|
+
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
|
127
|
+
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
|
128
|
+
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
129
|
+
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
130
|
+
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
131
|
+
gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
|
132
|
+
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
133
|
+
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
134
|
+
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
@@ -0,0 +1,78 @@
|
|
1
|
+
package parquet
|
2
|
+
|
3
|
+
import (
|
4
|
+
"fmt"
|
5
|
+
"io"
|
6
|
+
"io/ioutil"
|
7
|
+
"os"
|
8
|
+
|
9
|
+
"github.com/bsm/feedx"
|
10
|
+
)
|
11
|
+
|
12
|
+
// Format is a parquet format.
|
13
|
+
type Format struct {
|
14
|
+
TempDir string
|
15
|
+
Columns []string // column names to include
|
16
|
+
BatchSize int // batch size, default: 1,000
|
17
|
+
}
|
18
|
+
|
19
|
+
// NewDecoder implements Format.
|
20
|
+
func (f *Format) NewDecoder(r io.Reader) (feedx.FormatDecoder, error) {
|
21
|
+
if rs, ok := r.(io.ReadSeeker); ok {
|
22
|
+
return newDecoder(rs, f.Columns, f.BatchSize)
|
23
|
+
}
|
24
|
+
|
25
|
+
tmp, err := copyToTempFile(f.TempDir, r)
|
26
|
+
if err != nil {
|
27
|
+
return nil, err
|
28
|
+
}
|
29
|
+
|
30
|
+
dec, err := newDecoder(tmp, f.Columns, f.BatchSize)
|
31
|
+
if err != nil {
|
32
|
+
_ = tmp.Close()
|
33
|
+
return nil, err
|
34
|
+
}
|
35
|
+
dec.closers = append(dec.closers, tmp)
|
36
|
+
return dec, nil
|
37
|
+
}
|
38
|
+
|
39
|
+
// NewEncoder implements Format.
|
40
|
+
func (*Format) NewEncoder(w io.Writer) (feedx.FormatEncoder, error) {
|
41
|
+
return nil, fmt.Errorf("not implemented")
|
42
|
+
}
|
43
|
+
|
44
|
+
// --------------------------------------------------------------------
|
45
|
+
|
46
|
+
type tempFile struct{ *os.File }
|
47
|
+
|
48
|
+
func copyToTempFile(dir string, r io.Reader) (*tempFile, error) {
|
49
|
+
w, err := ioutil.TempFile(dir, "feedx-ext-parquet")
|
50
|
+
if err != nil {
|
51
|
+
return nil, err
|
52
|
+
}
|
53
|
+
if _, err := io.Copy(w, r); err != nil {
|
54
|
+
_ = w.Close()
|
55
|
+
_ = os.Remove(w.Name())
|
56
|
+
return nil, err
|
57
|
+
}
|
58
|
+
if err := w.Close(); err != nil {
|
59
|
+
_ = os.Remove(w.Name())
|
60
|
+
return nil, err
|
61
|
+
}
|
62
|
+
|
63
|
+
f, err := os.Open(w.Name())
|
64
|
+
if err != nil {
|
65
|
+
_ = os.Remove(w.Name())
|
66
|
+
return nil, err
|
67
|
+
}
|
68
|
+
|
69
|
+
return &tempFile{File: f}, nil
|
70
|
+
}
|
71
|
+
|
72
|
+
func (f tempFile) Close() error {
|
73
|
+
err := f.File.Close()
|
74
|
+
if e := os.Remove(f.Name()); e != nil {
|
75
|
+
err = e
|
76
|
+
}
|
77
|
+
return err
|
78
|
+
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
package parquet_test
|
2
|
+
|
3
|
+
import (
|
4
|
+
"testing"
|
5
|
+
"time"
|
6
|
+
|
7
|
+
. "github.com/onsi/ginkgo"
|
8
|
+
. "github.com/onsi/gomega"
|
9
|
+
)
|
10
|
+
|
11
|
+
type mockStruct struct {
|
12
|
+
ID int `parquet:"id"`
|
13
|
+
Bool bool `parquet:"bool_col"`
|
14
|
+
TinyInt int8 `parquet:"tinyint_col"`
|
15
|
+
SmallUint uint16 `parquet:"smallint_col"`
|
16
|
+
StdInt int `parquet:"int_col"`
|
17
|
+
BigInt int64 `parquet:"bigint_col"`
|
18
|
+
Float *float32 `parquet:"float_col"`
|
19
|
+
Double float64 `parquet:"double_col"`
|
20
|
+
DateString string `parquet:"date_string_col"`
|
21
|
+
ByteString []byte `parquet:"string_col"`
|
22
|
+
Timestamp time.Time `parquet:"timestamp_col"`
|
23
|
+
}
|
24
|
+
|
25
|
+
func TestSuite(t *testing.T) {
|
26
|
+
RegisterFailHandler(Fail)
|
27
|
+
RunSpecs(t, "feedx/ext/parquet")
|
28
|
+
}
|
@@ -0,0 +1,89 @@
|
|
1
|
+
package parquet
|
2
|
+
|
3
|
+
import (
|
4
|
+
"io"
|
5
|
+
|
6
|
+
kpq "github.com/bsm/parquet-go/parquet"
|
7
|
+
)
|
8
|
+
|
9
|
+
type columnReader struct {
|
10
|
+
file *kpq.File
|
11
|
+
col kpq.Column
|
12
|
+
rowGroup int
|
13
|
+
|
14
|
+
chunk *kpq.ColumnChunkReader
|
15
|
+
values []interface{}
|
16
|
+
dLevels []uint16
|
17
|
+
rLevels []uint16
|
18
|
+
|
19
|
+
n, i, vi int // chunk stats
|
20
|
+
}
|
21
|
+
|
22
|
+
func newColumnReader(file *kpq.File, col kpq.Column, batchSize int) *columnReader {
|
23
|
+
return &columnReader{
|
24
|
+
file: file,
|
25
|
+
col: col,
|
26
|
+
values: make([]interface{}, batchSize),
|
27
|
+
dLevels: make([]uint16, batchSize),
|
28
|
+
rLevels: make([]uint16, batchSize),
|
29
|
+
}
|
30
|
+
}
|
31
|
+
|
32
|
+
func (c *columnReader) Name() string {
|
33
|
+
return c.col.String()
|
34
|
+
}
|
35
|
+
|
36
|
+
func (c *columnReader) Next() (interface{}, error) {
|
37
|
+
if err := c.ensureChunk(); err != nil {
|
38
|
+
return nil, err
|
39
|
+
}
|
40
|
+
|
41
|
+
if err := c.ensureValues(); err == kpq.EndOfChunk {
|
42
|
+
c.chunk = nil
|
43
|
+
return c.Next()
|
44
|
+
} else if err != nil {
|
45
|
+
return nil, err
|
46
|
+
}
|
47
|
+
|
48
|
+
dLevel := c.dLevels[c.i]
|
49
|
+
c.i++
|
50
|
+
|
51
|
+
if notNull := dLevel == c.col.MaxD(); notNull {
|
52
|
+
val := c.values[c.vi]
|
53
|
+
c.vi++
|
54
|
+
return val, nil
|
55
|
+
}
|
56
|
+
|
57
|
+
return nil, nil
|
58
|
+
}
|
59
|
+
|
60
|
+
func (c *columnReader) ensureChunk() error {
|
61
|
+
if c.chunk != nil {
|
62
|
+
return nil
|
63
|
+
}
|
64
|
+
if c.rowGroup >= len(c.file.MetaData.RowGroups) {
|
65
|
+
return io.EOF
|
66
|
+
}
|
67
|
+
|
68
|
+
rd, err := c.file.NewReader(c.col, c.rowGroup)
|
69
|
+
if err != nil {
|
70
|
+
return err
|
71
|
+
}
|
72
|
+
c.chunk = rd
|
73
|
+
c.rowGroup++
|
74
|
+
return nil
|
75
|
+
}
|
76
|
+
|
77
|
+
func (c *columnReader) ensureValues() error {
|
78
|
+
if c.n != 0 && c.i < c.n {
|
79
|
+
return nil
|
80
|
+
}
|
81
|
+
|
82
|
+
n, err := c.chunk.Read(c.values, c.dLevels, c.rLevels)
|
83
|
+
if err != nil {
|
84
|
+
return err
|
85
|
+
}
|
86
|
+
|
87
|
+
c.n, c.i, c.vi = n, 0, 0
|
88
|
+
return nil
|
89
|
+
}
|
Binary file
|
@@ -0,0 +1,51 @@
|
|
1
|
+
package parquet
|
2
|
+
|
3
|
+
import (
|
4
|
+
"reflect"
|
5
|
+
"strings"
|
6
|
+
"sync"
|
7
|
+
)
|
8
|
+
|
9
|
+
type structFields map[string]int
|
10
|
+
|
11
|
+
var fieldCache sync.Map // map[reflect.Type]structFields
|
12
|
+
|
13
|
+
// cachedTypeFields is like typeFields but uses a cache to avoid repeated work.
|
14
|
+
//
|
15
|
+
// "Inspired" by https://golang.org/src/encoding/json/encode.go
|
16
|
+
// Copyright 2010 The Go Authors. All rights reserved.
|
17
|
+
func cachedTypeFields(t reflect.Type) structFields {
|
18
|
+
if f, ok := fieldCache.Load(t); ok {
|
19
|
+
return f.(structFields)
|
20
|
+
}
|
21
|
+
|
22
|
+
f, _ := fieldCache.LoadOrStore(t, typeFields(t))
|
23
|
+
return f.(structFields)
|
24
|
+
}
|
25
|
+
|
26
|
+
func tagName(tag string) string {
|
27
|
+
if pos := strings.Index(tag, ","); pos != -1 {
|
28
|
+
return tag[:pos]
|
29
|
+
}
|
30
|
+
return tag
|
31
|
+
}
|
32
|
+
|
33
|
+
// "Inspired" by https://golang.org/src/encoding/json/encode.go
|
34
|
+
// Copyright 2010 The Go Authors. All rights reserved.
|
35
|
+
func typeFields(t reflect.Type) structFields {
|
36
|
+
index := make(map[string]int, t.NumField())
|
37
|
+
for i := 0; i < t.NumField(); i++ {
|
38
|
+
field := t.Field(i)
|
39
|
+
tag := field.Tag.Get("parquet")
|
40
|
+
if tag == "-" {
|
41
|
+
continue
|
42
|
+
}
|
43
|
+
|
44
|
+
name := field.Name
|
45
|
+
if s := tagName(tag); s != "" {
|
46
|
+
name = s
|
47
|
+
}
|
48
|
+
index[name] = i
|
49
|
+
}
|
50
|
+
return index
|
51
|
+
}
|