censive 0.19 → 0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ff272ee20f4d6b15b65622fa444b494cd58fd01c651b6b6e3685539c0633299d
4
- data.tar.gz: e7a5890bf19b674fa63c8268c8f7a6895f9811984ade1368823ee273dc30aa01
3
+ metadata.gz: 5dffdaf597e038881e378eb30acb7c44cde08de1f9e40e2180076eaa11356c68
4
+ data.tar.gz: f9d7f77ac597a5d5a86fc1adcad430802ab20bd306bf5856f1191f57ff22f872
5
5
  SHA512:
6
- metadata.gz: 3156bc0abdf0b59d4f95be11e396f18421a17bf21b0e0cdc4c6e9b63f864ac9bb75a5db6480df07fba82633a11ce4b43dd24e917352f15f2db27de517a08f314
7
- data.tar.gz: cc688477d6b85152241fd6619cd58441c4805f1cef762a819de2e7a69cb4a5ba4f2b2a688a9e32590dbe32ef4c0fdbd796d8ffe2607ae635b192da04054ad571
6
+ metadata.gz: a0187489ebac8a9011f0f77dc9d52ca821ab080271f3eca6a1a40409b587534a9f4608d1f3b65a0253e587c242d01465e3cd773377f8d00b2fbd1723db4b5650
7
+ data.tar.gz: 94f2e7a204d8b40e058f41d193add0002d169d5d244e81c6895e465de159c6a953f09e313689891f7d12c05bead3baa41ad6fd525a8e297143758553e39ef1ba
data/censive.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "censive"
3
- s.version = "0.19"
3
+ s.version = "0.21"
4
4
  s.author = "Steve Shreeve"
5
5
  s.email = "steve.shreeve@gmail.com"
6
6
  s.summary =
Binary file
Binary file
@@ -0,0 +1,24 @@
1
+ digraph csv {
2
+ rankdir=LR;
3
+ node [ shape = point ];
4
+ ENTRY;
5
+ en_2;
6
+ eof_3;
7
+ node [ shape = circle, height = 0.2 ];
8
+ node [ fixedsize = true, height = 0.65, shape = doublecircle ];
9
+ 2;
10
+ 3;
11
+ node [ shape = circle ];
12
+ 1 -> 1 [ label = "-128..-1, 1..'!', '#'..127" ];
13
+ 1 -> 2 [ label = "'\"' / last2, initts" ];
14
+ 2 -> 2 [ label = "0 / ts, last5, initts" ];
15
+ 2 -> 2 [ label = "'\\n', '\\r' / ts, last4, initts" ];
16
+ 2 -> 1 [ label = "'\"' / ts" ];
17
+ 2 -> 2 [ label = "',' / ts, last3, initts" ];
18
+ 2 -> 3 [ label = "DEF / ts" ];
19
+ 3 -> 2 [ label = "0, '\\n', '\\r', '\"', ',' / next1, initts" ];
20
+ 3 -> 3 [ label = "DEF" ];
21
+ ENTRY -> 2 [ label = "IN" ];
22
+ en_2 -> 2 [ label = "csv_scan" ];
23
+ 3 -> eof_3 [ label = "EOF / next1" ];
24
+ }
data/diagram/csv.dot ADDED
@@ -0,0 +1,57 @@
1
+ digraph finite_state_machine {
2
+ rankdir=LR;
3
+ node [fontname="Helvetica,Arial,sans-serif", shape=circle, style=filled, fillcolor="#dddddd"];
4
+ edge [fontname="Helvetica,Arial,sans-serif"]
5
+
6
+ 1 [label="1: StartRow"];
7
+ 2 [label="2: InComment"];
8
+ 3 [label="3: StartColumn", shape=doublecircle, fillcolor="#ffdddd"];
9
+ 4 [label="4: InQuotedColumn"];
10
+ 5 [label="5: InDoubleEscapedQuote"];
11
+ 6 [label="6: InEscapedQuote"];
12
+ 7 [label="7: InColumn"];
13
+ 8 [label="8: EndColumnSeparator"];
14
+ 9 [label="9: EndColumnRow", shape=doublecircle, fillcolor="#ffdddd"];
15
+ 10 [label="10: InRowEnd", shape=doublecircle, fillcolor="#ffdddd"];
16
+ 11 [label="11: CRLF"];
17
+ 12 [label="12: EndRow"];
18
+
19
+ 1 -> 1 [label="eol / discard"];
20
+ 1 -> 2 [label="comment / discard"];
21
+ 1 -> 3 [label="* / ε"];
22
+
23
+ 2 -> 1 [label="LF / discard"];
24
+ 2 -> 2 [label="* / discard"];
25
+
26
+ 3 -> 4 [label="quote & @quoting / discard"];
27
+ 3 -> 7 [label="* / copyout"];
28
+ 3 -> 8 [label="sep / discard"];
29
+ 3 -> 9 [label="eol / ε"]
30
+
31
+ 4 -> 4 [label="* / copyout"];
32
+ 4 -> 5 [label="quote & @quoting / discard"];
33
+ 4 -> 6 [label="esc & @quoting / discard"];
34
+
35
+ 5 -> 4 [label="quote & @quoting & @double-quote / copyout"];
36
+ 5 -> 7 [label="* / copyout"];
37
+ 5 -> 8 [label="sep / discard"];
38
+ 5 -> 9 [label="eol / ε"]
39
+
40
+ 6 -> 4 [label="* / copyout"];
41
+
42
+ 7 -> 7 [label="* / copyout"];
43
+ 7 -> 8 [label="sep / discard"];
44
+ 7 -> 9 [label="eol / ε"]
45
+
46
+ 8 -> 3 [label="* / ε"];
47
+
48
+ 9 -> 10 [label="* / ε"];
49
+
50
+ 10 -> 11 [label="CR & @isCRLF / discard"];
51
+ 10 -> 12 [label="* / discard"];
52
+
53
+ 11 -> 1 [label="* / ε"];
54
+ 11 -> 1 [label="LF / discard"];
55
+
56
+ 12 -> 1 [label="* / ε"];
57
+ }
data/diagram/csv.png ADDED
Binary file
data/diagram/csv.rl ADDED
@@ -0,0 +1,45 @@
1
+ %%{
2
+ machine csv;
3
+
4
+ variable p s->p;
5
+ variable pe s->pe;
6
+ variable eof s->eof;
7
+ access s->;
8
+
9
+ EOF = 0;
10
+ EOL = [\r\n];
11
+ comma = [,];
12
+ string = [^,"\r\n\0]*;
13
+ quote = '"' [^"\0]* '"';
14
+
15
+ csv_scan := |*
16
+
17
+ string => {
18
+ return_token(TK_String);
19
+ fbreak;
20
+ };
21
+
22
+ quote => {
23
+ return_token(TK_Quote);
24
+ s->data += 1;
25
+ fbreak;
26
+ };
27
+
28
+ comma => {
29
+ return_token(TK_Comma);
30
+ fbreak;
31
+ };
32
+
33
+ EOL => {
34
+ s->curline += 1;
35
+ return_token(TK_EOL);
36
+ fbreak;
37
+ };
38
+
39
+ EOF => {
40
+ return_token(TK_EOF);
41
+ fbreak;
42
+ };
43
+
44
+ *|;
45
+ }%%
data/diagram/csv.svg ADDED
@@ -0,0 +1,270 @@
1
+ <svg width="1063" height="1078" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0.00 0.00 2351.40 964.49">
2
+ <g id="graph0" class="graph" transform="translate(4.0000488281250455,960.4899951171875) scale(1)">
3
+ <title>finite_state_machine</title>
4
+ <polygon fill="white" stroke="transparent" points="-4,4 -4,-960.49 2347.4,-960.49 2347.4,4 -4,4"/>
5
+ <!-- 1 -->
6
+ <g id="node1" class="node">
7
+ <title>1</title>
8
+ <ellipse fill="#dddddd" stroke="black" cx="57.44" cy="-723.59" rx="57.39" ry="57.39"/>
9
+ <text text-anchor="middle" x="57.44" y="-719.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">1: StartRow</text>
10
+ </g>
11
+ <!-- 1&#45;&gt;1 -->
12
+ <g id="edge1" class="edge">
13
+ <title>1-&gt;1</title>
14
+ <path fill="none" stroke="black" d="M34.14,-776.12C35.77,-789.23 43.54,-799.03 57.44,-799.03 67.55,-799.03 74.41,-793.86 78.03,-786"/>
15
+ <polygon fill="black" stroke="black" points="81.47,-786.69 80.75,-776.12 74.72,-784.84 81.47,-786.69"/>
16
+ <text text-anchor="middle" x="57.44" y="-803.23" font-family="Helvetica,Arial,sans-serif" font-size="14.00">eol / discard</text>
17
+ </g>
18
+ <!-- 2 -->
19
+ <g id="node2" class="node">
20
+ <title>2</title>
21
+ <ellipse fill="#dddddd" stroke="black" cx="328.63" cy="-854.59" rx="67.19" ry="67.19"/>
22
+ <text text-anchor="middle" x="328.63" y="-850.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">2: InComment</text>
23
+ </g>
24
+ <!-- 1&#45;&gt;2 -->
25
+ <g id="edge2" class="edge">
26
+ <title>1-&gt;2</title>
27
+ <path fill="none" stroke="black" d="M84.65,-774.4C96.82,-793.08 113.04,-812.58 132.89,-824.59 168.22,-845.97 213.53,-853.86 251.55,-856.22"/>
28
+ <polygon fill="black" stroke="black" points="251.39,-859.72 261.56,-856.73 251.75,-852.73 251.39,-859.72"/>
29
+ <text text-anchor="middle" x="184.6" y="-858.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">comment / discard</text>
30
+ </g>
31
+ <!-- 3 -->
32
+ <g id="node3" class="node">
33
+ <title>3</title>
34
+ <ellipse fill="#ffdddd" stroke="black" cx="328.63" cy="-636.59" rx="70.15" ry="70.15"/>
35
+ <ellipse fill="none" stroke="black" cx="328.63" cy="-636.59" rx="74.14" ry="74.14"/>
36
+ <text text-anchor="middle" x="328.63" y="-632.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">3: StartColumn</text>
37
+ </g>
38
+ <!-- 1&#45;&gt;3 -->
39
+ <g id="edge3" class="edge">
40
+ <title>1-&gt;3</title>
41
+ <path fill="none" stroke="black" d="M112.27,-706.19C151.07,-693.65 204.07,-676.52 247.79,-662.39"/>
42
+ <polygon fill="black" stroke="black" points="249.1,-665.65 257.54,-659.24 246.95,-658.99 249.1,-665.65"/>
43
+ <text text-anchor="middle" x="184.6" y="-701.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / ε</text>
44
+ </g>
45
+ <!-- 2&#45;&gt;1 -->
46
+ <g id="edge4" class="edge">
47
+ <title>2-&gt;1</title>
48
+ <path fill="none" stroke="black" d="M270.63,-820.34C259.46,-814.36 247.69,-808.55 236.3,-803.79 192.13,-785.33 176.26,-793.89 132.89,-773.59 126.2,-770.46 119.43,-766.77 112.85,-762.84"/>
49
+ <polygon fill="black" stroke="black" points="114.35,-759.65 104.01,-757.36 110.66,-765.6 114.35,-759.65"/>
50
+ <text text-anchor="middle" x="184.6" y="-807.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">LF / discard</text>
51
+ </g>
52
+ <!-- 2&#45;&gt;2 -->
53
+ <g id="edge5" class="edge">
54
+ <title>2-&gt;2</title>
55
+ <path fill="none" stroke="black" d="M302.42,-916.55C304.98,-929.96 313.72,-939.69 328.63,-939.69 339.69,-939.69 347.35,-934.33 351.62,-926.08"/>
56
+ <polygon fill="black" stroke="black" points="354.95,-927.14 354.83,-916.55 348.32,-924.91 354.95,-927.14"/>
57
+ <text text-anchor="middle" x="328.63" y="-943.89" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / discard</text>
58
+ </g>
59
+ <!-- 4 -->
60
+ <g id="node4" class="node">
61
+ <title>4</title>
62
+ <ellipse fill="#dddddd" stroke="black" cx="683.49" cy="-434.59" rx="88.61" ry="88.61"/>
63
+ <text text-anchor="middle" x="683.49" y="-430.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">4: InQuotedColumn</text>
64
+ </g>
65
+ <!-- 3&#45;&gt;4 -->
66
+ <g id="edge6" class="edge">
67
+ <title>3-&gt;4</title>
68
+ <path fill="none" stroke="black" d="M393.53,-600.01C450.62,-567.33 534.7,-519.19 597.46,-483.27"/>
69
+ <polygon fill="black" stroke="black" points="599.23,-486.29 606.17,-478.28 595.75,-480.21 599.23,-486.29"/>
70
+ <text text-anchor="middle" x="498.94" y="-585.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">quote &amp; @quoting / discard</text>
71
+ </g>
72
+ <!-- 7 -->
73
+ <g id="node7" class="node">
74
+ <title>7</title>
75
+ <ellipse fill="#dddddd" stroke="black" cx="1463.55" cy="-539.59" rx="60.26" ry="60.26"/>
76
+ <text text-anchor="middle" x="1463.55" y="-535.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">7: InColumn</text>
77
+ </g>
78
+ <!-- 3&#45;&gt;7 -->
79
+ <g id="edge7" class="edge">
80
+ <title>3-&gt;7</title>
81
+ <path fill="none" stroke="black" d="M403.1,-636.36C598.59,-635.31 1128.2,-629.41 1299.77,-596.59 1333.2,-590.19 1369.05,-578.24 1398.57,-566.92"/>
82
+ <polygon fill="black" stroke="black" points="1399.87,-570.17 1407.92,-563.27 1397.33,-563.64 1399.87,-570.17"/>
83
+ <text text-anchor="middle" x="922.7" y="-633.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / copyout</text>
84
+ </g>
85
+ <!-- 8 -->
86
+ <g id="node8" class="node">
87
+ <title>8</title>
88
+ <ellipse fill="#dddddd" stroke="black" cx="1734.3" cy="-615.59" rx="104.78" ry="104.78"/>
89
+ <text text-anchor="middle" x="1734.3" y="-611.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">8: EndColumnSeparator</text>
90
+ </g>
91
+ <!-- 3&#45;&gt;8 -->
92
+ <g id="edge8" class="edge">
93
+ <title>3-&gt;8</title>
94
+ <path fill="none" stroke="black" d="M396.5,-667.43C404.63,-670.5 412.9,-673.31 420.95,-675.59 534.03,-707.6 564.96,-714.59 682.49,-714.59 682.49,-714.59 682.49,-714.59 1464.55,-714.59 1523.06,-714.59 1584.57,-693.67 1633.95,-671.07"/>
95
+ <polygon fill="black" stroke="black" points="1635.44,-674.23 1643.03,-666.83 1632.48,-667.89 1635.44,-674.23"/>
96
+ <text text-anchor="middle" x="1186.56" y="-718.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">sep / discard</text>
97
+ </g>
98
+ <!-- 9 -->
99
+ <g id="node9" class="node">
100
+ <title>9</title>
101
+ <ellipse fill="#ffdddd" stroke="black" cx="1734.3" cy="-358.59" rx="85.77" ry="85.77"/>
102
+ <ellipse fill="none" stroke="black" cx="1734.3" cy="-358.59" rx="89.77" ry="89.77"/>
103
+ <text text-anchor="middle" x="1734.3" y="-354.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">9: EndColumnRow</text>
104
+ </g>
105
+ <!-- 3&#45;&gt;9 -->
106
+ <g id="edge9" class="edge">
107
+ <title>3-&gt;9</title>
108
+ <path fill="none" stroke="black" d="M345.32,-563.91C379.9,-425.69 476.52,-138.59 682.49,-138.59 682.49,-138.59 682.49,-138.59 1464.55,-138.59 1554.14,-138.59 1630.26,-212.7 1678.37,-274.89"/>
109
+ <polygon fill="black" stroke="black" points="1675.84,-277.34 1684.68,-283.19 1681.42,-273.11 1675.84,-277.34"/>
110
+ <text text-anchor="middle" x="1186.56" y="-142.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">eol / ε</text>
111
+ </g>
112
+ <!-- 4&#45;&gt;4 -->
113
+ <g id="edge10" class="edge">
114
+ <title>4-&gt;4</title>
115
+ <path fill="none" stroke="black" d="M651.28,-517.24C655.68,-531.36 666.42,-541.15 683.49,-541.15 696.56,-541.15 705.91,-535.41 711.55,-526.39"/>
116
+ <polygon fill="black" stroke="black" points="714.76,-527.8 715.69,-517.24 708.38,-524.91 714.76,-527.8"/>
117
+ <text text-anchor="middle" x="683.49" y="-545.35" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / copyout</text>
118
+ </g>
119
+ <!-- 5 -->
120
+ <g id="node5" class="node">
121
+ <title>5</title>
122
+ <ellipse fill="#dddddd" stroke="black" cx="1186.56" cy="-474.59" rx="113.42" ry="113.42"/>
123
+ <text text-anchor="middle" x="1186.56" y="-470.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">5: InDoubleEscapedQuote</text>
124
+ </g>
125
+ <!-- 4&#45;&gt;5 -->
126
+ <g id="edge11" class="edge">
127
+ <title>4-&gt;5</title>
128
+ <path fill="none" stroke="black" d="M771.93,-427.87C847.42,-423.74 959.17,-421.61 1055.35,-435.79 1059.74,-436.44 1064.18,-437.2 1068.65,-438.05"/>
129
+ <polygon fill="black" stroke="black" points="1067.96,-441.48 1078.46,-440.06 1069.36,-434.63 1067.96,-441.48"/>
130
+ <text text-anchor="middle" x="922.7" y="-439.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">quote &amp; @quoting / discard</text>
131
+ </g>
132
+ <!-- 6 -->
133
+ <g id="node6" class="node">
134
+ <title>6</title>
135
+ <ellipse fill="#dddddd" stroke="black" cx="1186.56" cy="-258.59" rx="84.56" ry="84.56"/>
136
+ <text text-anchor="middle" x="1186.56" y="-254.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">6: InEscapedQuote</text>
137
+ </g>
138
+ <!-- 4&#45;&gt;6 -->
139
+ <g id="edge12" class="edge">
140
+ <title>4-&gt;6</title>
141
+ <path fill="none" stroke="black" d="M741.48,-367.37C756.03,-353.75 772.55,-340.84 790.04,-331.79 885,-282.64 1007.86,-266.1 1091.27,-260.74"/>
142
+ <polygon fill="black" stroke="black" points="1091.72,-264.22 1101.5,-260.13 1091.3,-257.23 1091.72,-264.22"/>
143
+ <text text-anchor="middle" x="922.7" y="-335.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">esc &amp; @quoting / discard</text>
144
+ </g>
145
+ <!-- 5&#45;&gt;4 -->
146
+ <g id="edge13" class="edge">
147
+ <title>5-&gt;4</title>
148
+ <path fill="none" stroke="black" d="M1073.18,-471.73C993.87,-468.88 885.2,-463.32 790.04,-452.59 787,-452.25 783.91,-451.87 780.8,-451.48"/>
149
+ <polygon fill="black" stroke="black" points="781.16,-447.99 770.78,-450.12 780.22,-454.93 781.16,-447.99"/>
150
+ <text text-anchor="middle" x="922.7" y="-474.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">quote &amp; @quoting &amp; @double-quote / copyout</text>
151
+ </g>
152
+ <!-- 5&#45;&gt;7 -->
153
+ <g id="edge14" class="edge">
154
+ <title>5-&gt;7</title>
155
+ <path fill="none" stroke="black" d="M1297.03,-500.47C1330.14,-508.29 1365.5,-516.65 1395.01,-523.63"/>
156
+ <polygon fill="black" stroke="black" points="1394.24,-527.04 1404.78,-525.93 1395.85,-520.23 1394.24,-527.04"/>
157
+ <text text-anchor="middle" x="1351.59" y="-523.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / copyout</text>
158
+ </g>
159
+ <!-- 5&#45;&gt;8 -->
160
+ <g id="edge15" class="edge">
161
+ <title>5-&gt;8</title>
162
+ <path fill="none" stroke="black" d="M1298.72,-458.11C1405.89,-444.39 1559.29,-431.13 1611.66,-458.59 1637.2,-471.99 1659.03,-493.39 1676.86,-516.13"/>
163
+ <polygon fill="black" stroke="black" points="1674.16,-518.37 1682.99,-524.22 1679.74,-514.14 1674.16,-518.37"/>
164
+ <text text-anchor="middle" x="1463.55" y="-450.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">sep / discard</text>
165
+ </g>
166
+ <!-- 5&#45;&gt;9 -->
167
+ <g id="edge16" class="edge">
168
+ <title>5-&gt;9</title>
169
+ <path fill="none" stroke="black" d="M1294.02,-438.19C1302.04,-435.58 1310.03,-433.02 1317.77,-430.59 1355.58,-418.73 1364.68,-414.13 1403.42,-405.79 1480.49,-389.21 1568.97,-376.9 1634.88,-369.02"/>
170
+ <polygon fill="black" stroke="black" points="1635.32,-372.5 1644.84,-367.85 1634.5,-365.54 1635.32,-372.5"/>
171
+ <text text-anchor="middle" x="1463.55" y="-409.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">eol / ε</text>
172
+ </g>
173
+ <!-- 6&#45;&gt;4 -->
174
+ <g id="edge17" class="edge">
175
+ <title>6-&gt;4</title>
176
+ <path fill="none" stroke="black" d="M1119.98,-311.45C1100.16,-325.3 1077.7,-339.01 1055.35,-348.59 944.65,-396.04 906.9,-374.63 790.04,-403.79 786.19,-404.75 782.27,-405.76 778.32,-406.8"/>
177
+ <polygon fill="black" stroke="black" points="777.2,-403.48 768.44,-409.44 779.01,-410.24 777.2,-403.48"/>
178
+ <text text-anchor="middle" x="922.7" y="-407.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / copyout</text>
179
+ </g>
180
+ <!-- 7&#45;&gt;7 -->
181
+ <g id="edge18" class="edge">
182
+ <title>7-&gt;7</title>
183
+ <path fill="none" stroke="black" d="M1443.53,-596.47C1445.49,-608.79 1452.16,-617.72 1463.55,-617.72 1471.56,-617.72 1477.23,-613.3 1480.58,-606.4"/>
184
+ <polygon fill="black" stroke="black" points="1484.03,-607.06 1483.57,-596.47 1477.33,-605.04 1484.03,-607.06"/>
185
+ <text text-anchor="middle" x="1463.55" y="-621.92" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / copyout</text>
186
+ </g>
187
+ <!-- 7&#45;&gt;8 -->
188
+ <g id="edge19" class="edge">
189
+ <title>7-&gt;8</title>
190
+ <path fill="none" stroke="black" d="M1521.54,-555.71C1551.26,-564.11 1588.64,-574.68 1623.68,-584.59"/>
191
+ <polygon fill="black" stroke="black" points="1622.92,-588.01 1633.5,-587.37 1624.83,-581.28 1622.92,-588.01"/>
192
+ <text text-anchor="middle" x="1576.67" y="-583.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">sep / discard</text>
193
+ </g>
194
+ <!-- 7&#45;&gt;9 -->
195
+ <g id="edge20" class="edge">
196
+ <title>7-&gt;9</title>
197
+ <path fill="none" stroke="black" d="M1513.84,-506.39C1552.07,-480.65 1605.89,-444.4 1650.67,-414.24"/>
198
+ <polygon fill="black" stroke="black" points="1652.87,-416.98 1659.21,-408.49 1648.96,-411.17 1652.87,-416.98"/>
199
+ <text text-anchor="middle" x="1576.67" y="-489.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">eol / ε</text>
200
+ </g>
201
+ <!-- 8&#45;&gt;3 -->
202
+ <g id="edge21" class="edge">
203
+ <title>8-&gt;3</title>
204
+ <path fill="none" stroke="black" d="M1632.54,-641.15C1582.39,-651.73 1520.64,-661.59 1464.55,-661.59 682.49,-661.59 682.49,-661.59 682.49,-661.59 566.08,-661.59 536.92,-657.67 420.95,-647.59 418.28,-647.36 415.57,-647.11 412.84,-646.85"/>
205
+ <polygon fill="black" stroke="black" points="413.15,-643.36 402.85,-645.84 412.45,-650.33 413.15,-643.36"/>
206
+ <text text-anchor="middle" x="1186.56" y="-665.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / ε</text>
207
+ </g>
208
+ <!-- 10 -->
209
+ <g id="node10" class="node">
210
+ <title>10</title>
211
+ <ellipse fill="#ffdddd" stroke="black" cx="1971.46" cy="-292.59" rx="67.76" ry="67.76"/>
212
+ <ellipse fill="none" stroke="black" cx="1971.46" cy="-292.59" rx="71.77" ry="71.77"/>
213
+ <text text-anchor="middle" x="1971.46" y="-288.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">10: InRowEnd</text>
214
+ </g>
215
+ <!-- 9&#45;&gt;10 -->
216
+ <g id="edge22" class="edge">
217
+ <title>9-&gt;10</title>
218
+ <path fill="none" stroke="black" d="M1821.19,-334.48C1844.5,-327.94 1869.6,-320.9 1892.44,-314.49"/>
219
+ <polygon fill="black" stroke="black" points="1893.4,-317.85 1902.09,-311.78 1891.51,-311.11 1893.4,-317.85"/>
220
+ <text text-anchor="middle" x="1869.38" y="-327.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / ε</text>
221
+ </g>
222
+ <!-- 11 -->
223
+ <g id="node11" class="node">
224
+ <title>11</title>
225
+ <ellipse fill="#dddddd" stroke="black" cx="2283.81" cy="-292.59" rx="49.89" ry="49.89"/>
226
+ <text text-anchor="middle" x="2283.81" y="-288.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">11: CRLF</text>
227
+ </g>
228
+ <!-- 10&#45;&gt;11 -->
229
+ <g id="edge23" class="edge">
230
+ <title>10-&gt;11</title>
231
+ <path fill="none" stroke="black" d="M2043.27,-292.59C2097.67,-292.59 2171.93,-292.59 2223.29,-292.59"/>
232
+ <polygon fill="black" stroke="black" points="2223.56,-296.09 2233.56,-292.59 2223.56,-289.09 2223.56,-296.09"/>
233
+ <text text-anchor="middle" x="2133.66" y="-296.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">CR &amp; @isCRLF / discard</text>
234
+ </g>
235
+ <!-- 12 -->
236
+ <g id="node12" class="node">
237
+ <title>12</title>
238
+ <ellipse fill="#dddddd" stroke="black" cx="2283.81" cy="-59.59" rx="59.68" ry="59.68"/>
239
+ <text text-anchor="middle" x="2283.81" y="-55.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">12: EndRow</text>
240
+ </g>
241
+ <!-- 10&#45;&gt;12 -->
242
+ <g id="edge24" class="edge">
243
+ <title>10-&gt;12</title>
244
+ <path fill="none" stroke="black" d="M2029.38,-249.85C2085.76,-207.53 2171.52,-143.14 2227.38,-101.21"/>
245
+ <polygon fill="black" stroke="black" points="2229.5,-103.99 2235.39,-95.19 2225.29,-98.4 2229.5,-103.99"/>
246
+ <text text-anchor="middle" x="2133.66" y="-227.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / discard</text>
247
+ </g>
248
+ <!-- 11&#45;&gt;1 -->
249
+ <g id="edge25" class="edge">
250
+ <title>11-&gt;1</title>
251
+ <path fill="none" stroke="black" d="M2274.36,-341.68C2250.6,-461.58 2171.54,-759.59 1972.46,-759.59 327.63,-759.59 327.63,-759.59 327.63,-759.59 257.49,-759.59 177.99,-747.44 123.67,-737.25"/>
252
+ <polygon fill="black" stroke="black" points="124.31,-733.81 113.83,-735.38 123,-740.69 124.31,-733.81"/>
253
+ <text text-anchor="middle" x="1351.59" y="-763.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / ε</text>
254
+ </g>
255
+ <!-- 11&#45;&gt;1 -->
256
+ <g id="edge26" class="edge">
257
+ <title>11-&gt;1</title>
258
+ <path fill="none" stroke="black" d="M2274.91,-243.27C2265.69,-202.96 2246.26,-147.99 2206.22,-118.59 2121.97,-56.71 2077,-92.59 1972.46,-92.59 327.63,-92.59 327.63,-92.59 327.63,-92.59 205.79,-92.59 106.77,-495.61 71.91,-657.47"/>
259
+ <polygon fill="black" stroke="black" points="68.47,-656.8 69.81,-667.32 75.32,-658.27 68.47,-656.8"/>
260
+ <text text-anchor="middle" x="1351.59" y="-96.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">LF / discard</text>
261
+ </g>
262
+ <!-- 12&#45;&gt;1 -->
263
+ <g id="edge27" class="edge">
264
+ <title>12-&gt;1</title>
265
+ <path fill="none" stroke="black" d="M2224.19,-57.48C2161.79,-55.42 2060.17,-52.59 1972.46,-52.59 327.63,-52.59 327.63,-52.59 327.63,-52.59 227.11,-52.59 186.58,-82.62 132.89,-167.59 82.43,-247.45 65.7,-525.56 60.55,-655.6"/>
266
+ <polygon fill="black" stroke="black" points="57.04,-655.7 60.16,-665.83 64.04,-655.97 57.04,-655.7"/>
267
+ <text text-anchor="middle" x="1351.59" y="-56.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / ε</text>
268
+ </g>
269
+ </g>
270
+ </svg>
@@ -0,0 +1,26 @@
1
+ digraph csv {
2
+ rankdir=LR;
3
+ node [ shape = point ];
4
+ ENTRY;
5
+ en_4;
6
+ eof_5;
7
+ node [ shape = circle, height = 0.2 ];
8
+ node [ fixedsize = true, height = 0.65, shape = doublecircle ];
9
+ 4;
10
+ 5;
11
+ node [ shape = circle ];
12
+ 1 -> 2 [ label = "'?'" ];
13
+ 2 -> 4 [ label = "'\\n' / last4, initts" ];
14
+ 3 -> 3 [ label = "-128..-1, 1..'!', '#'..127" ];
15
+ 3 -> 4 [ label = "'\"' / last2, initts" ];
16
+ 4 -> 5 [ label = "-128..-1, 1..'\\t', '\\v'..'\\f', 14..'!', '#'..'+', '-'..127 / ts" ];
17
+ 4 -> 4 [ label = "0 / ts, last5, initts" ];
18
+ 4 -> 1 [ label = "'\\r' / ts" ];
19
+ 4 -> 3 [ label = "'\"' / ts" ];
20
+ 4 -> 4 [ label = "',' / ts, last3, initts" ];
21
+ 5 -> 4 [ label = "0, '\\n', '\\r', '\"', ',' / next1, initts" ];
22
+ 5 -> 5 [ label = "DEF" ];
23
+ ENTRY -> 4 [ label = "IN" ];
24
+ en_4 -> 4 [ label = "csv_scan" ];
25
+ 5 -> eof_5 [ label = "EOF / next1" ];
26
+ }
@@ -0,0 +1,50 @@
1
+ %%{
2
+ machine csv;
3
+
4
+ variable p s->p;
5
+ variable pe s->pe;
6
+ variable eof s->eof;
7
+ access s->;
8
+
9
+ eol = [\r\n];
10
+ comment = '#';
11
+ CR = "\r";
12
+ LF = "\n";
13
+
14
+ EOF = 0;
15
+ EOL = /\r?\n/;
16
+ comma = [,];
17
+ string = [^,"\r\n\0]*;
18
+ quote = '"' [^"\0]* '"';
19
+
20
+ csv_scan := |*
21
+
22
+ string => {
23
+ return_token(TK_String);
24
+ fbreak;
25
+ };
26
+
27
+ quote => {
28
+ return_token(TK_Quote);
29
+ s->data += 1;
30
+ fbreak;
31
+ };
32
+
33
+ comma => {
34
+ return_token(TK_Comma);
35
+ fbreak;
36
+ };
37
+
38
+ EOL => {
39
+ s->curline += 1;
40
+ return_token(TK_EOL);
41
+ fbreak;
42
+ };
43
+
44
+ EOF => {
45
+ return_token(TK_EOF);
46
+ fbreak;
47
+ };
48
+
49
+ *|;
50
+ }%%
data/lib/censive.rb CHANGED
@@ -4,7 +4,7 @@
4
4
  # censive - A quick and lightweight CSV handling library for Ruby
5
5
  #
6
6
  # Author: Steve Shreeve (steve.shreeve@gmail.com)
7
- # Date: Feb 5, 2023
7
+ # Date: Feb 14, 2023
8
8
  #
9
9
  # https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
10
10
  # https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
@@ -14,14 +14,22 @@
14
14
  # GOALS:
15
15
  # 1. Faster than Ruby's default CSV library
16
16
  # 2. Lightweight code with streamlined and optimized logic
17
- # 3. Support most non-compliant CSV variations (eg - @excel, @relax, @strip)
17
+ # 3. Support most non-compliant CSV variations (@excel, @relax, etc)
18
+ # 4. Support most commonly used CSV options (@sep, @quote, @strip, @drop, etc)
18
19
  #
19
- # TODO: Support IO streaming
20
+ # TODO:
21
+ # 1. Support IO streaming
22
+ # 2. Review all encodings, we may be losing speed when mixing encodings
23
+ # 3. Speedup possible if our @unquoted regex reads beyond @eol's
24
+ # 4. Will using String#freeze give us a speed up?
25
+ # 5. Implement support for scan_until(string) <= right now only regex is valid
20
26
  # ============================================================================
21
27
 
22
28
  require "strscan"
23
29
 
24
30
  class Censive < StringScanner
31
+ attr :encoding, :out
32
+
25
33
  def self.parse(...)
26
34
  new(...).parse
27
35
  end
@@ -34,78 +42,73 @@ class Censive < StringScanner
34
42
  end
35
43
  end
36
44
 
37
- def initialize(str="",
38
- drop: false , # drop trailing empty fields?
39
- encoding: "utf-8" , # character encoding
45
+ def initialize(str=nil,
46
+ drop: false , # drop trailing empty columns?
47
+ encoding: nil , # character encoding
40
48
  excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
41
49
  mode: :compact, # export mode: compact or full
42
- out: $stdout , # output stream, needs to respond to <<
50
+ out: nil , # output stream, needs to respond to <<
43
51
  quote: '"' , # quote character
44
52
  relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
45
53
  rowsep: "\n" , # row separator for export
46
54
  sep: "," , # column separator character
47
- strip: false , # strip fields when reading
48
- **opts # grab bag
55
+ strip: false , # strip columns when reading
56
+ **opts # grab bag
49
57
  )
50
- # data source
51
- str = File.open(str, "r:#{encoding}").read if !str[100] && File.readable?(str)
58
+ # initialize data source
59
+ if str && str.size < 100 && File.readable?(str)
60
+ str = File.open(str, encoding ? "r:#{encoding}" : "r").read
61
+ else
62
+ str ||= ""
63
+ str = str.encode(encoding) if encoding
64
+ end
52
65
  super(str)
53
66
  reset
54
67
 
55
- # options
68
+ # config options
69
+ @cheat = true
56
70
  @drop = drop
71
+ @encoding = str.encoding
57
72
  @excel = excel
58
73
  @mode = mode
59
- @out = out
60
- @quote = quote
74
+ @out = out || $stdout
61
75
  @relax = relax
76
+ @strip = strip
77
+
78
+ # config strings
79
+ @quote = quote
62
80
  @rowsep = rowsep
63
81
  @sep = sep
64
- @strip = strip
65
82
 
66
- # definitions
67
- @cr = "\r"
68
- @lf = "\n"
69
- @es = ""
70
- @eq = "="
71
- @esc = (@quote * 2)
72
- @eol = /#{@cr}#{@lf}?|#{@lf}|\z/o # end of line
73
- @eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
83
+ # static strings
84
+ @cr = "\r"
85
+ @lf = "\n"
86
+ @es = ""
87
+ @eq = "="
88
+
89
+ # combinations
90
+ @esc = (@quote * 2)
91
+ @seq = [@sep, @eq].join # used for parsing in excel mode
92
+
93
+ # regexes
94
+ @eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
95
+ @eol = /#{@cr}#{@lf}?|#{@lf}/o # end of line
96
+ @escapes = /(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o
97
+ @quotable = /#{"\\"+@sep}|#{@cr}|#{@lf}/o
98
+ @quotes = /#{@quote}/o
99
+ @seps = /#{@sep}+/o
100
+ @quoted = @excel ? /(?:=)?#{@quote}/o : @quote
101
+ @unquoted = /[^#{@sep}#{@cr}#{@lf}][^#{@quote}#{@cr}#{@lf}]*/o
102
+ @leadzero = /\A0\d*\z/
74
103
  end
75
104
 
76
105
  def reset(str=nil)
77
- self.string = str if str
78
- super()
79
106
  @rows = nil
80
107
  @cols = @cells = 0
81
- end
82
-
83
- # ==[ Lexer ]==
84
108
 
85
- def next_token
86
- excel = true if @excel && scan(@eq)
87
-
88
- if scan(@quote) # consume quoted cell
89
- token = ""
90
- while true
91
- token << (scan_until(/#{@quote}/o) or bomb "unclosed quote")[0..-2]
92
- token << @quote and next if scan(@quote)
93
- break if scan(@eoc)
94
- @relax or bomb "invalid character after quote"
95
- token << @quote + (scan_until(/#{@quote}/o) or bomb "bad inline quote")
96
- end
97
- elsif scan(@sep) then return @es
98
- elsif scan(@eol) then return nil
99
- else # consume unquoted cell
100
- token = scan_until(@eoc) or bomb "unexpected character"
101
- token.prepend(@eq) if excel
102
- end
103
- scan(@sep)
104
- @strip ? token.strip : token
105
- end
106
-
107
- def bomb(msg)
108
- abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
109
+ self.string = str if str
110
+ @encoding = string.encoding
111
+ super()
109
112
  end
110
113
 
111
114
  # ==[ Parser ]==
@@ -122,18 +125,72 @@ class Censive < StringScanner
122
125
  end
123
126
 
124
127
  def next_row
128
+ if @cheat and line = scan_until(@eol)
129
+ row = line.chomp!.split(@sep, -1)
130
+ row.each do |col|
131
+ next if (saw = col.count(@quote)).zero?
132
+ next if (saw == 2) && col.delete_prefix!(@quote) && col.delete_suffix!(@quote)
133
+ @cheat = false
134
+ break
135
+ end if line.include?(@quote)
136
+ @cheat and return @strip ? row.each(&:strip!) : row
137
+ unscan
138
+ end
139
+
125
140
  token = next_token or return
126
- row = [token]
127
- row << token while token = next_token
141
+ row = []
142
+ row.push(*token)
143
+ row.push(*token) while token = next_token
128
144
  row
129
145
  end
130
146
 
147
+ def next_token
148
+ if scan(@quoted) # quoted cell
149
+ token = ""
150
+ while true
151
+ token << (scan_until(@quotes) or bomb "unclosed quote")[0..-2]
152
+ token << @quote and next if scan(@quote)
153
+ scan(@eoc) and break
154
+ @relax or bomb "invalid character after quote"
155
+ token << @quote + (scan_until(@quotes) or bomb "bad inline quote")
156
+ end
157
+ scan(@sep)
158
+ @strip ? token.strip : token
159
+ elsif match = scan(@unquoted) # unquoted cell(s)
160
+ if check(@quote) && !match.chomp!(@sep) # if we see a stray quote
161
+ unless @excel && match.chomp!(@seq) # unless an excel literal, fix it
162
+ match << (scan_until(@eoc) or bomb "stray quote")
163
+ scan(@sep)
164
+ end
165
+ end
166
+ tokens = match.split(@sep, -1)
167
+ @strip ? tokens.map!(&:strip) : tokens
168
+ elsif scan(@sep)
169
+ match = scan(@seps)
170
+ match ? match.split(@sep, -1) : @es
171
+ else
172
+ scan(@eol)
173
+ nil
174
+ end
175
+ end
176
+
177
+ def each
178
+ @rows ||= parse
179
+ @rows.each {|row| yield row }
180
+ end
181
+
182
+ def export(**opts)
183
+ out = opts.empty? ? self : self.class.writer(**opts)
184
+ each {|row| out << row }
185
+ out.out
186
+ end
187
+
131
188
  # ==[ Helpers ]==
132
189
 
133
190
  # returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
134
191
  def grok(str)
135
- if idx = str.index(/(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o)
136
- $1 ? 2 : str.index(/#{@quote}/o, idx) ? 2 : 1
192
+ if idx = str.index(@escapes)
193
+ $1 ? 2 : str.index(@quotes, idx) ? 2 : 1
137
194
  else
138
195
  0
139
196
  end
@@ -153,11 +210,11 @@ class Censive < StringScanner
153
210
  row
154
211
  when 1
155
212
  row.map do |col|
156
- col.match?(/#{"\\"+@sep}|#{@cr}|#{@lf}/o) ? "#{q}#{col}#{q}" : col
213
+ col.match?(@quotable) ? "#{q}#{col}#{q}" : col
157
214
  end
158
215
  else
159
216
  row.map do |col|
160
- @excel && col =~ /\A0\d*\z/ ? "=#{q}#{col}#{q}" :
217
+ @excel && col =~ @leadzero ? "=#{q}#{col}#{q}" :
161
218
  case grok(col)
162
219
  when 0 then col
163
220
  when 1 then "#{q}#{col}#{q}"
@@ -168,7 +225,7 @@ class Censive < StringScanner
168
225
  when :full
169
226
  if @excel
170
227
  row.map do |col|
171
- col =~ /\A0\d*\z/ ? "=#{q}#{col}#{q}" : "#{q}#{col.gsub(q, @esc)}#{q}"
228
+ col =~ @leadzero ? "=#{q}#{col}#{q}" : "#{q}#{col.gsub(q, @esc)}#{q}"
172
229
  end
173
230
  else
174
231
  row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
@@ -178,16 +235,6 @@ class Censive < StringScanner
178
235
  @out << out + @rowsep
179
236
  end
180
237
 
181
- def each
182
- @rows ||= parse
183
- @rows.each {|row| yield row }
184
- end
185
-
186
- def export(**opts)
187
- out = opts.empty? ? self : self.class.writer(**opts)
188
- each {|row| out << row }
189
- end
190
-
191
238
  def stats
192
239
  wide = string.size.to_s.size
193
240
  puts "%#{wide}d rows" % @rows.size
@@ -195,27 +242,38 @@ class Censive < StringScanner
195
242
  puts "%#{wide}d cells" % @cells
196
243
  puts "%#{wide}d bytes" % string.size
197
244
  end
245
+
246
+ def bomb(msg)
247
+ abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
248
+ end
198
249
  end
199
250
 
200
251
  if __FILE__ == $0
201
- raw = DATA.gets("\n\n").chomp
202
- # raw = File.read(ARGV.first || "lc-2023.csv")
203
- csv = Censive.new(raw, excel: true, relax: true)
204
- csv.export # (sep: ",", excel: true)
252
+ str = DATA.gets("\n\n").chomp
253
+ # str = File.read(ARGV.first || "lc-2023.csv")
254
+ # str = File.open("KEN_ALL.CSV", "r:cp932").read
255
+
256
+ # require "stringio"
257
+ # csv = Censive.new(str, excel: true, relax: true)
258
+ # out = "" # StringIO.new
259
+ # csv.export(out: out) # (excel: true) # sep: "|")
260
+ # puts out # .string
261
+
262
+ puts Censive.new(str, excel: true, relax: true, out: "").export
205
263
  end
206
264
 
207
265
  __END__
208
- Name,Age,Shoe
209
- Alice,27,5
266
+ "Don",="007",10,"Ed"
267
+ Name,Age,,,Shoe,,,
268
+ "Alice",27,5
210
269
  Bob,33,10 1/2
211
270
  Charlie or "Chuck",=B2 + B3,9
212
- "Doug E Fresh",="007",10
213
271
  Subtotal,=sum(B2:B5),="01234"
214
-
215
- # first line works in "relax" mode, bottom line is compliant
272
+ A,B,C,D
273
+ A,B,"C",D
274
+ A,B,C",D
275
+ A,B,"C",D
216
276
  123,"CHO, JOELLE "JOJO"",456
217
277
  123,"CHO, JOELLE ""JOJO""",456
218
-
219
- # Excel mode checking
220
278
  =,=x,x=,="x",="","","=",123,0123,="123",="0123"
221
- ,=x,x=,x,,,,,,=,,123,="0123",123,,="0123" # <= a little off
279
+ ,=x,x=,x,,,,,,=,,123,="0123",123,,="0123"
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "./censive"
4
+ require "digest/md5"
5
+
6
+ path = ARGV[0] || "KEN_ALL.CSV"
7
+ mode = path =~ /^ken/i ? "r:cp932" : "r"
8
+
9
+ data = File.open(path, mode).read
10
+ rows = Censive.parse(data)
11
+
12
+ puts "%s %s (%d size)" % [Digest::MD5.hexdigest(rows.join), path, File.stat(path).size], ""
data/lib/test-csv.rb ADDED
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "csv"
4
+ require "digest/md5"
5
+
6
+ path = ARGV[0] || "KEN_ALL.CSV"
7
+ mode = path =~ /^ken/i ? "r:cp932" : "r"
8
+
9
+ data = File.open(path, mode).read
10
+ rows = CSV.parse(data)
11
+
12
+ puts "%s %s (%d size)" % [Digest::MD5.hexdigest(rows.join), path, File.stat(path).size], ""
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: censive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.19'
4
+ version: '0.21'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-05 00:00:00.000000000 Z
11
+ date: 2023-02-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A quick and lightweight CSV handling library for Ruby
14
14
  email: steve.shreeve@gmail.com
@@ -19,7 +19,18 @@ files:
19
19
  - LICENSE
20
20
  - README.md
21
21
  - censive.gemspec
22
+ - diagram/NFA to Regex.pdf
23
+ - diagram/censive@ce9d51d.png
24
+ - diagram/csv-ragel.dot
25
+ - diagram/csv.dot
26
+ - diagram/csv.png
27
+ - diagram/csv.rl
28
+ - diagram/csv.svg
29
+ - diagram/diagram.dot
30
+ - diagram/diagram.rl
22
31
  - lib/censive.rb
32
+ - lib/test-censive.rb
33
+ - lib/test-csv.rb
23
34
  - test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.tsv
24
35
  homepage: https://github.com/shreeve/censive
25
36
  licenses: