censive 0.19 → 0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/censive.gemspec +1 -1
- data/diagram/NFA to Regex.pdf +0 -0
- data/diagram/censive@ce9d51d.png +0 -0
- data/diagram/csv-ragel.dot +24 -0
- data/diagram/csv.dot +57 -0
- data/diagram/csv.png +0 -0
- data/diagram/csv.rl +45 -0
- data/diagram/csv.svg +270 -0
- data/diagram/diagram.dot +26 -0
- data/diagram/diagram.rl +50 -0
- data/lib/censive.rb +139 -81
- data/lib/test-censive.rb +12 -0
- data/lib/test-csv.rb +12 -0
- metadata +13 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5dffdaf597e038881e378eb30acb7c44cde08de1f9e40e2180076eaa11356c68
|
4
|
+
data.tar.gz: f9d7f77ac597a5d5a86fc1adcad430802ab20bd306bf5856f1191f57ff22f872
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a0187489ebac8a9011f0f77dc9d52ca821ab080271f3eca6a1a40409b587534a9f4608d1f3b65a0253e587c242d01465e3cd773377f8d00b2fbd1723db4b5650
|
7
|
+
data.tar.gz: 94f2e7a204d8b40e058f41d193add0002d169d5d244e81c6895e465de159c6a953f09e313689891f7d12c05bead3baa41ad6fd525a8e297143758553e39ef1ba
|
data/censive.gemspec
CHANGED
Binary file
|
Binary file
|
@@ -0,0 +1,24 @@
|
|
1
|
+
digraph csv {
|
2
|
+
rankdir=LR;
|
3
|
+
node [ shape = point ];
|
4
|
+
ENTRY;
|
5
|
+
en_2;
|
6
|
+
eof_3;
|
7
|
+
node [ shape = circle, height = 0.2 ];
|
8
|
+
node [ fixedsize = true, height = 0.65, shape = doublecircle ];
|
9
|
+
2;
|
10
|
+
3;
|
11
|
+
node [ shape = circle ];
|
12
|
+
1 -> 1 [ label = "-128..-1, 1..'!', '#'..127" ];
|
13
|
+
1 -> 2 [ label = "'\"' / last2, initts" ];
|
14
|
+
2 -> 2 [ label = "0 / ts, last5, initts" ];
|
15
|
+
2 -> 2 [ label = "'\\n', '\\r' / ts, last4, initts" ];
|
16
|
+
2 -> 1 [ label = "'\"' / ts" ];
|
17
|
+
2 -> 2 [ label = "',' / ts, last3, initts" ];
|
18
|
+
2 -> 3 [ label = "DEF / ts" ];
|
19
|
+
3 -> 2 [ label = "0, '\\n', '\\r', '\"', ',' / next1, initts" ];
|
20
|
+
3 -> 3 [ label = "DEF" ];
|
21
|
+
ENTRY -> 2 [ label = "IN" ];
|
22
|
+
en_2 -> 2 [ label = "csv_scan" ];
|
23
|
+
3 -> eof_3 [ label = "EOF / next1" ];
|
24
|
+
}
|
data/diagram/csv.dot
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
digraph finite_state_machine {
|
2
|
+
rankdir=LR;
|
3
|
+
node [fontname="Helvetica,Arial,sans-serif", shape=circle, style=filled, fillcolor="#dddddd"];
|
4
|
+
edge [fontname="Helvetica,Arial,sans-serif"]
|
5
|
+
|
6
|
+
1 [label="1: StartRow"];
|
7
|
+
2 [label="2: InComment"];
|
8
|
+
3 [label="3: StartColumn", shape=doublecircle, fillcolor="#ffdddd"];
|
9
|
+
4 [label="4: InQuotedColumn"];
|
10
|
+
5 [label="5: InDoubleEscapedQuote"];
|
11
|
+
6 [label="6: InEscapedQuote"];
|
12
|
+
7 [label="7: InColumn"];
|
13
|
+
8 [label="8: EndColumnSeparator"];
|
14
|
+
9 [label="9: EndColumnRow", shape=doublecircle, fillcolor="#ffdddd"];
|
15
|
+
10 [label="10: InRowEnd", shape=doublecircle, fillcolor="#ffdddd"];
|
16
|
+
11 [label="11: CRLF"];
|
17
|
+
12 [label="12: EndRow"];
|
18
|
+
|
19
|
+
1 -> 1 [label="eol / discard"];
|
20
|
+
1 -> 2 [label="comment / discard"];
|
21
|
+
1 -> 3 [label="* / ε"];
|
22
|
+
|
23
|
+
2 -> 1 [label="LF / discard"];
|
24
|
+
2 -> 2 [label="* / discard"];
|
25
|
+
|
26
|
+
3 -> 4 [label="quote & @quoting / discard"];
|
27
|
+
3 -> 7 [label="* / copyout"];
|
28
|
+
3 -> 8 [label="sep / discard"];
|
29
|
+
3 -> 9 [label="eol / ε"]
|
30
|
+
|
31
|
+
4 -> 4 [label="* / copyout"];
|
32
|
+
4 -> 5 [label="quote & @quoting / discard"];
|
33
|
+
4 -> 6 [label="esc & @quoting / discard"];
|
34
|
+
|
35
|
+
5 -> 4 [label="quote & @quoting & @double-quote / copyout"];
|
36
|
+
5 -> 7 [label="* / copyout"];
|
37
|
+
5 -> 8 [label="sep / discard"];
|
38
|
+
5 -> 9 [label="eol / ε"]
|
39
|
+
|
40
|
+
6 -> 4 [label="* / copyout"];
|
41
|
+
|
42
|
+
7 -> 7 [label="* / copyout"];
|
43
|
+
7 -> 8 [label="sep / discard"];
|
44
|
+
7 -> 9 [label="eol / ε"]
|
45
|
+
|
46
|
+
8 -> 3 [label="* / ε"];
|
47
|
+
|
48
|
+
9 -> 10 [label="* / ε"];
|
49
|
+
|
50
|
+
10 -> 11 [label="CR & @isCRLF / discard"];
|
51
|
+
10 -> 12 [label="* / discard"];
|
52
|
+
|
53
|
+
11 -> 1 [label="* / ε"];
|
54
|
+
11 -> 1 [label="LF / discard"];
|
55
|
+
|
56
|
+
12 -> 1 [label="* / ε"];
|
57
|
+
}
|
data/diagram/csv.png
ADDED
Binary file
|
data/diagram/csv.rl
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
%%{
|
2
|
+
machine csv;
|
3
|
+
|
4
|
+
variable p s->p;
|
5
|
+
variable pe s->pe;
|
6
|
+
variable eof s->eof;
|
7
|
+
access s->;
|
8
|
+
|
9
|
+
EOF = 0;
|
10
|
+
EOL = [\r\n];
|
11
|
+
comma = [,];
|
12
|
+
string = [^,"\r\n\0]*;
|
13
|
+
quote = '"' [^"\0]* '"';
|
14
|
+
|
15
|
+
csv_scan := |*
|
16
|
+
|
17
|
+
string => {
|
18
|
+
return_token(TK_String);
|
19
|
+
fbreak;
|
20
|
+
};
|
21
|
+
|
22
|
+
quote => {
|
23
|
+
return_token(TK_Quote);
|
24
|
+
s->data += 1;
|
25
|
+
fbreak;
|
26
|
+
};
|
27
|
+
|
28
|
+
comma => {
|
29
|
+
return_token(TK_Comma);
|
30
|
+
fbreak;
|
31
|
+
};
|
32
|
+
|
33
|
+
EOL => {
|
34
|
+
s->curline += 1;
|
35
|
+
return_token(TK_EOL);
|
36
|
+
fbreak;
|
37
|
+
};
|
38
|
+
|
39
|
+
EOF => {
|
40
|
+
return_token(TK_EOF);
|
41
|
+
fbreak;
|
42
|
+
};
|
43
|
+
|
44
|
+
*|;
|
45
|
+
}%%
|
data/diagram/csv.svg
ADDED
@@ -0,0 +1,270 @@
|
|
1
|
+
<svg width="1063" height="1078" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0.00 0.00 2351.40 964.49">
|
2
|
+
<g id="graph0" class="graph" transform="translate(4.0000488281250455,960.4899951171875) scale(1)">
|
3
|
+
<title>finite_state_machine</title>
|
4
|
+
<polygon fill="white" stroke="transparent" points="-4,4 -4,-960.49 2347.4,-960.49 2347.4,4 -4,4"/>
|
5
|
+
<!-- 1 -->
|
6
|
+
<g id="node1" class="node">
|
7
|
+
<title>1</title>
|
8
|
+
<ellipse fill="#dddddd" stroke="black" cx="57.44" cy="-723.59" rx="57.39" ry="57.39"/>
|
9
|
+
<text text-anchor="middle" x="57.44" y="-719.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">1: StartRow</text>
|
10
|
+
</g>
|
11
|
+
<!-- 1->1 -->
|
12
|
+
<g id="edge1" class="edge">
|
13
|
+
<title>1->1</title>
|
14
|
+
<path fill="none" stroke="black" d="M34.14,-776.12C35.77,-789.23 43.54,-799.03 57.44,-799.03 67.55,-799.03 74.41,-793.86 78.03,-786"/>
|
15
|
+
<polygon fill="black" stroke="black" points="81.47,-786.69 80.75,-776.12 74.72,-784.84 81.47,-786.69"/>
|
16
|
+
<text text-anchor="middle" x="57.44" y="-803.23" font-family="Helvetica,Arial,sans-serif" font-size="14.00">eol / discard</text>
|
17
|
+
</g>
|
18
|
+
<!-- 2 -->
|
19
|
+
<g id="node2" class="node">
|
20
|
+
<title>2</title>
|
21
|
+
<ellipse fill="#dddddd" stroke="black" cx="328.63" cy="-854.59" rx="67.19" ry="67.19"/>
|
22
|
+
<text text-anchor="middle" x="328.63" y="-850.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">2: InComment</text>
|
23
|
+
</g>
|
24
|
+
<!-- 1->2 -->
|
25
|
+
<g id="edge2" class="edge">
|
26
|
+
<title>1->2</title>
|
27
|
+
<path fill="none" stroke="black" d="M84.65,-774.4C96.82,-793.08 113.04,-812.58 132.89,-824.59 168.22,-845.97 213.53,-853.86 251.55,-856.22"/>
|
28
|
+
<polygon fill="black" stroke="black" points="251.39,-859.72 261.56,-856.73 251.75,-852.73 251.39,-859.72"/>
|
29
|
+
<text text-anchor="middle" x="184.6" y="-858.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">comment / discard</text>
|
30
|
+
</g>
|
31
|
+
<!-- 3 -->
|
32
|
+
<g id="node3" class="node">
|
33
|
+
<title>3</title>
|
34
|
+
<ellipse fill="#ffdddd" stroke="black" cx="328.63" cy="-636.59" rx="70.15" ry="70.15"/>
|
35
|
+
<ellipse fill="none" stroke="black" cx="328.63" cy="-636.59" rx="74.14" ry="74.14"/>
|
36
|
+
<text text-anchor="middle" x="328.63" y="-632.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">3: StartColumn</text>
|
37
|
+
</g>
|
38
|
+
<!-- 1->3 -->
|
39
|
+
<g id="edge3" class="edge">
|
40
|
+
<title>1->3</title>
|
41
|
+
<path fill="none" stroke="black" d="M112.27,-706.19C151.07,-693.65 204.07,-676.52 247.79,-662.39"/>
|
42
|
+
<polygon fill="black" stroke="black" points="249.1,-665.65 257.54,-659.24 246.95,-658.99 249.1,-665.65"/>
|
43
|
+
<text text-anchor="middle" x="184.6" y="-701.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / ε</text>
|
44
|
+
</g>
|
45
|
+
<!-- 2->1 -->
|
46
|
+
<g id="edge4" class="edge">
|
47
|
+
<title>2->1</title>
|
48
|
+
<path fill="none" stroke="black" d="M270.63,-820.34C259.46,-814.36 247.69,-808.55 236.3,-803.79 192.13,-785.33 176.26,-793.89 132.89,-773.59 126.2,-770.46 119.43,-766.77 112.85,-762.84"/>
|
49
|
+
<polygon fill="black" stroke="black" points="114.35,-759.65 104.01,-757.36 110.66,-765.6 114.35,-759.65"/>
|
50
|
+
<text text-anchor="middle" x="184.6" y="-807.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">LF / discard</text>
|
51
|
+
</g>
|
52
|
+
<!-- 2->2 -->
|
53
|
+
<g id="edge5" class="edge">
|
54
|
+
<title>2->2</title>
|
55
|
+
<path fill="none" stroke="black" d="M302.42,-916.55C304.98,-929.96 313.72,-939.69 328.63,-939.69 339.69,-939.69 347.35,-934.33 351.62,-926.08"/>
|
56
|
+
<polygon fill="black" stroke="black" points="354.95,-927.14 354.83,-916.55 348.32,-924.91 354.95,-927.14"/>
|
57
|
+
<text text-anchor="middle" x="328.63" y="-943.89" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / discard</text>
|
58
|
+
</g>
|
59
|
+
<!-- 4 -->
|
60
|
+
<g id="node4" class="node">
|
61
|
+
<title>4</title>
|
62
|
+
<ellipse fill="#dddddd" stroke="black" cx="683.49" cy="-434.59" rx="88.61" ry="88.61"/>
|
63
|
+
<text text-anchor="middle" x="683.49" y="-430.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">4: InQuotedColumn</text>
|
64
|
+
</g>
|
65
|
+
<!-- 3->4 -->
|
66
|
+
<g id="edge6" class="edge">
|
67
|
+
<title>3->4</title>
|
68
|
+
<path fill="none" stroke="black" d="M393.53,-600.01C450.62,-567.33 534.7,-519.19 597.46,-483.27"/>
|
69
|
+
<polygon fill="black" stroke="black" points="599.23,-486.29 606.17,-478.28 595.75,-480.21 599.23,-486.29"/>
|
70
|
+
<text text-anchor="middle" x="498.94" y="-585.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">quote & @quoting / discard</text>
|
71
|
+
</g>
|
72
|
+
<!-- 7 -->
|
73
|
+
<g id="node7" class="node">
|
74
|
+
<title>7</title>
|
75
|
+
<ellipse fill="#dddddd" stroke="black" cx="1463.55" cy="-539.59" rx="60.26" ry="60.26"/>
|
76
|
+
<text text-anchor="middle" x="1463.55" y="-535.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">7: InColumn</text>
|
77
|
+
</g>
|
78
|
+
<!-- 3->7 -->
|
79
|
+
<g id="edge7" class="edge">
|
80
|
+
<title>3->7</title>
|
81
|
+
<path fill="none" stroke="black" d="M403.1,-636.36C598.59,-635.31 1128.2,-629.41 1299.77,-596.59 1333.2,-590.19 1369.05,-578.24 1398.57,-566.92"/>
|
82
|
+
<polygon fill="black" stroke="black" points="1399.87,-570.17 1407.92,-563.27 1397.33,-563.64 1399.87,-570.17"/>
|
83
|
+
<text text-anchor="middle" x="922.7" y="-633.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / copyout</text>
|
84
|
+
</g>
|
85
|
+
<!-- 8 -->
|
86
|
+
<g id="node8" class="node">
|
87
|
+
<title>8</title>
|
88
|
+
<ellipse fill="#dddddd" stroke="black" cx="1734.3" cy="-615.59" rx="104.78" ry="104.78"/>
|
89
|
+
<text text-anchor="middle" x="1734.3" y="-611.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">8: EndColumnSeparator</text>
|
90
|
+
</g>
|
91
|
+
<!-- 3->8 -->
|
92
|
+
<g id="edge8" class="edge">
|
93
|
+
<title>3->8</title>
|
94
|
+
<path fill="none" stroke="black" d="M396.5,-667.43C404.63,-670.5 412.9,-673.31 420.95,-675.59 534.03,-707.6 564.96,-714.59 682.49,-714.59 682.49,-714.59 682.49,-714.59 1464.55,-714.59 1523.06,-714.59 1584.57,-693.67 1633.95,-671.07"/>
|
95
|
+
<polygon fill="black" stroke="black" points="1635.44,-674.23 1643.03,-666.83 1632.48,-667.89 1635.44,-674.23"/>
|
96
|
+
<text text-anchor="middle" x="1186.56" y="-718.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">sep / discard</text>
|
97
|
+
</g>
|
98
|
+
<!-- 9 -->
|
99
|
+
<g id="node9" class="node">
|
100
|
+
<title>9</title>
|
101
|
+
<ellipse fill="#ffdddd" stroke="black" cx="1734.3" cy="-358.59" rx="85.77" ry="85.77"/>
|
102
|
+
<ellipse fill="none" stroke="black" cx="1734.3" cy="-358.59" rx="89.77" ry="89.77"/>
|
103
|
+
<text text-anchor="middle" x="1734.3" y="-354.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">9: EndColumnRow</text>
|
104
|
+
</g>
|
105
|
+
<!-- 3->9 -->
|
106
|
+
<g id="edge9" class="edge">
|
107
|
+
<title>3->9</title>
|
108
|
+
<path fill="none" stroke="black" d="M345.32,-563.91C379.9,-425.69 476.52,-138.59 682.49,-138.59 682.49,-138.59 682.49,-138.59 1464.55,-138.59 1554.14,-138.59 1630.26,-212.7 1678.37,-274.89"/>
|
109
|
+
<polygon fill="black" stroke="black" points="1675.84,-277.34 1684.68,-283.19 1681.42,-273.11 1675.84,-277.34"/>
|
110
|
+
<text text-anchor="middle" x="1186.56" y="-142.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">eol / ε</text>
|
111
|
+
</g>
|
112
|
+
<!-- 4->4 -->
|
113
|
+
<g id="edge10" class="edge">
|
114
|
+
<title>4->4</title>
|
115
|
+
<path fill="none" stroke="black" d="M651.28,-517.24C655.68,-531.36 666.42,-541.15 683.49,-541.15 696.56,-541.15 705.91,-535.41 711.55,-526.39"/>
|
116
|
+
<polygon fill="black" stroke="black" points="714.76,-527.8 715.69,-517.24 708.38,-524.91 714.76,-527.8"/>
|
117
|
+
<text text-anchor="middle" x="683.49" y="-545.35" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / copyout</text>
|
118
|
+
</g>
|
119
|
+
<!-- 5 -->
|
120
|
+
<g id="node5" class="node">
|
121
|
+
<title>5</title>
|
122
|
+
<ellipse fill="#dddddd" stroke="black" cx="1186.56" cy="-474.59" rx="113.42" ry="113.42"/>
|
123
|
+
<text text-anchor="middle" x="1186.56" y="-470.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">5: InDoubleEscapedQuote</text>
|
124
|
+
</g>
|
125
|
+
<!-- 4->5 -->
|
126
|
+
<g id="edge11" class="edge">
|
127
|
+
<title>4->5</title>
|
128
|
+
<path fill="none" stroke="black" d="M771.93,-427.87C847.42,-423.74 959.17,-421.61 1055.35,-435.79 1059.74,-436.44 1064.18,-437.2 1068.65,-438.05"/>
|
129
|
+
<polygon fill="black" stroke="black" points="1067.96,-441.48 1078.46,-440.06 1069.36,-434.63 1067.96,-441.48"/>
|
130
|
+
<text text-anchor="middle" x="922.7" y="-439.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">quote & @quoting / discard</text>
|
131
|
+
</g>
|
132
|
+
<!-- 6 -->
|
133
|
+
<g id="node6" class="node">
|
134
|
+
<title>6</title>
|
135
|
+
<ellipse fill="#dddddd" stroke="black" cx="1186.56" cy="-258.59" rx="84.56" ry="84.56"/>
|
136
|
+
<text text-anchor="middle" x="1186.56" y="-254.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">6: InEscapedQuote</text>
|
137
|
+
</g>
|
138
|
+
<!-- 4->6 -->
|
139
|
+
<g id="edge12" class="edge">
|
140
|
+
<title>4->6</title>
|
141
|
+
<path fill="none" stroke="black" d="M741.48,-367.37C756.03,-353.75 772.55,-340.84 790.04,-331.79 885,-282.64 1007.86,-266.1 1091.27,-260.74"/>
|
142
|
+
<polygon fill="black" stroke="black" points="1091.72,-264.22 1101.5,-260.13 1091.3,-257.23 1091.72,-264.22"/>
|
143
|
+
<text text-anchor="middle" x="922.7" y="-335.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">esc & @quoting / discard</text>
|
144
|
+
</g>
|
145
|
+
<!-- 5->4 -->
|
146
|
+
<g id="edge13" class="edge">
|
147
|
+
<title>5->4</title>
|
148
|
+
<path fill="none" stroke="black" d="M1073.18,-471.73C993.87,-468.88 885.2,-463.32 790.04,-452.59 787,-452.25 783.91,-451.87 780.8,-451.48"/>
|
149
|
+
<polygon fill="black" stroke="black" points="781.16,-447.99 770.78,-450.12 780.22,-454.93 781.16,-447.99"/>
|
150
|
+
<text text-anchor="middle" x="922.7" y="-474.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">quote & @quoting & @double-quote / copyout</text>
|
151
|
+
</g>
|
152
|
+
<!-- 5->7 -->
|
153
|
+
<g id="edge14" class="edge">
|
154
|
+
<title>5->7</title>
|
155
|
+
<path fill="none" stroke="black" d="M1297.03,-500.47C1330.14,-508.29 1365.5,-516.65 1395.01,-523.63"/>
|
156
|
+
<polygon fill="black" stroke="black" points="1394.24,-527.04 1404.78,-525.93 1395.85,-520.23 1394.24,-527.04"/>
|
157
|
+
<text text-anchor="middle" x="1351.59" y="-523.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / copyout</text>
|
158
|
+
</g>
|
159
|
+
<!-- 5->8 -->
|
160
|
+
<g id="edge15" class="edge">
|
161
|
+
<title>5->8</title>
|
162
|
+
<path fill="none" stroke="black" d="M1298.72,-458.11C1405.89,-444.39 1559.29,-431.13 1611.66,-458.59 1637.2,-471.99 1659.03,-493.39 1676.86,-516.13"/>
|
163
|
+
<polygon fill="black" stroke="black" points="1674.16,-518.37 1682.99,-524.22 1679.74,-514.14 1674.16,-518.37"/>
|
164
|
+
<text text-anchor="middle" x="1463.55" y="-450.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">sep / discard</text>
|
165
|
+
</g>
|
166
|
+
<!-- 5->9 -->
|
167
|
+
<g id="edge16" class="edge">
|
168
|
+
<title>5->9</title>
|
169
|
+
<path fill="none" stroke="black" d="M1294.02,-438.19C1302.04,-435.58 1310.03,-433.02 1317.77,-430.59 1355.58,-418.73 1364.68,-414.13 1403.42,-405.79 1480.49,-389.21 1568.97,-376.9 1634.88,-369.02"/>
|
170
|
+
<polygon fill="black" stroke="black" points="1635.32,-372.5 1644.84,-367.85 1634.5,-365.54 1635.32,-372.5"/>
|
171
|
+
<text text-anchor="middle" x="1463.55" y="-409.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">eol / ε</text>
|
172
|
+
</g>
|
173
|
+
<!-- 6->4 -->
|
174
|
+
<g id="edge17" class="edge">
|
175
|
+
<title>6->4</title>
|
176
|
+
<path fill="none" stroke="black" d="M1119.98,-311.45C1100.16,-325.3 1077.7,-339.01 1055.35,-348.59 944.65,-396.04 906.9,-374.63 790.04,-403.79 786.19,-404.75 782.27,-405.76 778.32,-406.8"/>
|
177
|
+
<polygon fill="black" stroke="black" points="777.2,-403.48 768.44,-409.44 779.01,-410.24 777.2,-403.48"/>
|
178
|
+
<text text-anchor="middle" x="922.7" y="-407.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / copyout</text>
|
179
|
+
</g>
|
180
|
+
<!-- 7->7 -->
|
181
|
+
<g id="edge18" class="edge">
|
182
|
+
<title>7->7</title>
|
183
|
+
<path fill="none" stroke="black" d="M1443.53,-596.47C1445.49,-608.79 1452.16,-617.72 1463.55,-617.72 1471.56,-617.72 1477.23,-613.3 1480.58,-606.4"/>
|
184
|
+
<polygon fill="black" stroke="black" points="1484.03,-607.06 1483.57,-596.47 1477.33,-605.04 1484.03,-607.06"/>
|
185
|
+
<text text-anchor="middle" x="1463.55" y="-621.92" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / copyout</text>
|
186
|
+
</g>
|
187
|
+
<!-- 7->8 -->
|
188
|
+
<g id="edge19" class="edge">
|
189
|
+
<title>7->8</title>
|
190
|
+
<path fill="none" stroke="black" d="M1521.54,-555.71C1551.26,-564.11 1588.64,-574.68 1623.68,-584.59"/>
|
191
|
+
<polygon fill="black" stroke="black" points="1622.92,-588.01 1633.5,-587.37 1624.83,-581.28 1622.92,-588.01"/>
|
192
|
+
<text text-anchor="middle" x="1576.67" y="-583.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">sep / discard</text>
|
193
|
+
</g>
|
194
|
+
<!-- 7->9 -->
|
195
|
+
<g id="edge20" class="edge">
|
196
|
+
<title>7->9</title>
|
197
|
+
<path fill="none" stroke="black" d="M1513.84,-506.39C1552.07,-480.65 1605.89,-444.4 1650.67,-414.24"/>
|
198
|
+
<polygon fill="black" stroke="black" points="1652.87,-416.98 1659.21,-408.49 1648.96,-411.17 1652.87,-416.98"/>
|
199
|
+
<text text-anchor="middle" x="1576.67" y="-489.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">eol / ε</text>
|
200
|
+
</g>
|
201
|
+
<!-- 8->3 -->
|
202
|
+
<g id="edge21" class="edge">
|
203
|
+
<title>8->3</title>
|
204
|
+
<path fill="none" stroke="black" d="M1632.54,-641.15C1582.39,-651.73 1520.64,-661.59 1464.55,-661.59 682.49,-661.59 682.49,-661.59 682.49,-661.59 566.08,-661.59 536.92,-657.67 420.95,-647.59 418.28,-647.36 415.57,-647.11 412.84,-646.85"/>
|
205
|
+
<polygon fill="black" stroke="black" points="413.15,-643.36 402.85,-645.84 412.45,-650.33 413.15,-643.36"/>
|
206
|
+
<text text-anchor="middle" x="1186.56" y="-665.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / ε</text>
|
207
|
+
</g>
|
208
|
+
<!-- 10 -->
|
209
|
+
<g id="node10" class="node">
|
210
|
+
<title>10</title>
|
211
|
+
<ellipse fill="#ffdddd" stroke="black" cx="1971.46" cy="-292.59" rx="67.76" ry="67.76"/>
|
212
|
+
<ellipse fill="none" stroke="black" cx="1971.46" cy="-292.59" rx="71.77" ry="71.77"/>
|
213
|
+
<text text-anchor="middle" x="1971.46" y="-288.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">10: InRowEnd</text>
|
214
|
+
</g>
|
215
|
+
<!-- 9->10 -->
|
216
|
+
<g id="edge22" class="edge">
|
217
|
+
<title>9->10</title>
|
218
|
+
<path fill="none" stroke="black" d="M1821.19,-334.48C1844.5,-327.94 1869.6,-320.9 1892.44,-314.49"/>
|
219
|
+
<polygon fill="black" stroke="black" points="1893.4,-317.85 1902.09,-311.78 1891.51,-311.11 1893.4,-317.85"/>
|
220
|
+
<text text-anchor="middle" x="1869.38" y="-327.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / ε</text>
|
221
|
+
</g>
|
222
|
+
<!-- 11 -->
|
223
|
+
<g id="node11" class="node">
|
224
|
+
<title>11</title>
|
225
|
+
<ellipse fill="#dddddd" stroke="black" cx="2283.81" cy="-292.59" rx="49.89" ry="49.89"/>
|
226
|
+
<text text-anchor="middle" x="2283.81" y="-288.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">11: CRLF</text>
|
227
|
+
</g>
|
228
|
+
<!-- 10->11 -->
|
229
|
+
<g id="edge23" class="edge">
|
230
|
+
<title>10->11</title>
|
231
|
+
<path fill="none" stroke="black" d="M2043.27,-292.59C2097.67,-292.59 2171.93,-292.59 2223.29,-292.59"/>
|
232
|
+
<polygon fill="black" stroke="black" points="2223.56,-296.09 2233.56,-292.59 2223.56,-289.09 2223.56,-296.09"/>
|
233
|
+
<text text-anchor="middle" x="2133.66" y="-296.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">CR & @isCRLF / discard</text>
|
234
|
+
</g>
|
235
|
+
<!-- 12 -->
|
236
|
+
<g id="node12" class="node">
|
237
|
+
<title>12</title>
|
238
|
+
<ellipse fill="#dddddd" stroke="black" cx="2283.81" cy="-59.59" rx="59.68" ry="59.68"/>
|
239
|
+
<text text-anchor="middle" x="2283.81" y="-55.39" font-family="Helvetica,Arial,sans-serif" font-size="14.00">12: EndRow</text>
|
240
|
+
</g>
|
241
|
+
<!-- 10->12 -->
|
242
|
+
<g id="edge24" class="edge">
|
243
|
+
<title>10->12</title>
|
244
|
+
<path fill="none" stroke="black" d="M2029.38,-249.85C2085.76,-207.53 2171.52,-143.14 2227.38,-101.21"/>
|
245
|
+
<polygon fill="black" stroke="black" points="2229.5,-103.99 2235.39,-95.19 2225.29,-98.4 2229.5,-103.99"/>
|
246
|
+
<text text-anchor="middle" x="2133.66" y="-227.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / discard</text>
|
247
|
+
</g>
|
248
|
+
<!-- 11->1 -->
|
249
|
+
<g id="edge25" class="edge">
|
250
|
+
<title>11->1</title>
|
251
|
+
<path fill="none" stroke="black" d="M2274.36,-341.68C2250.6,-461.58 2171.54,-759.59 1972.46,-759.59 327.63,-759.59 327.63,-759.59 327.63,-759.59 257.49,-759.59 177.99,-747.44 123.67,-737.25"/>
|
252
|
+
<polygon fill="black" stroke="black" points="124.31,-733.81 113.83,-735.38 123,-740.69 124.31,-733.81"/>
|
253
|
+
<text text-anchor="middle" x="1351.59" y="-763.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / ε</text>
|
254
|
+
</g>
|
255
|
+
<!-- 11->1 -->
|
256
|
+
<g id="edge26" class="edge">
|
257
|
+
<title>11->1</title>
|
258
|
+
<path fill="none" stroke="black" d="M2274.91,-243.27C2265.69,-202.96 2246.26,-147.99 2206.22,-118.59 2121.97,-56.71 2077,-92.59 1972.46,-92.59 327.63,-92.59 327.63,-92.59 327.63,-92.59 205.79,-92.59 106.77,-495.61 71.91,-657.47"/>
|
259
|
+
<polygon fill="black" stroke="black" points="68.47,-656.8 69.81,-667.32 75.32,-658.27 68.47,-656.8"/>
|
260
|
+
<text text-anchor="middle" x="1351.59" y="-96.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">LF / discard</text>
|
261
|
+
</g>
|
262
|
+
<!-- 12->1 -->
|
263
|
+
<g id="edge27" class="edge">
|
264
|
+
<title>12->1</title>
|
265
|
+
<path fill="none" stroke="black" d="M2224.19,-57.48C2161.79,-55.42 2060.17,-52.59 1972.46,-52.59 327.63,-52.59 327.63,-52.59 327.63,-52.59 227.11,-52.59 186.58,-82.62 132.89,-167.59 82.43,-247.45 65.7,-525.56 60.55,-655.6"/>
|
266
|
+
<polygon fill="black" stroke="black" points="57.04,-655.7 60.16,-665.83 64.04,-655.97 57.04,-655.7"/>
|
267
|
+
<text text-anchor="middle" x="1351.59" y="-56.79" font-family="Helvetica,Arial,sans-serif" font-size="14.00">* / ε</text>
|
268
|
+
</g>
|
269
|
+
</g>
|
270
|
+
</svg>
|
data/diagram/diagram.dot
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
digraph csv {
|
2
|
+
rankdir=LR;
|
3
|
+
node [ shape = point ];
|
4
|
+
ENTRY;
|
5
|
+
en_4;
|
6
|
+
eof_5;
|
7
|
+
node [ shape = circle, height = 0.2 ];
|
8
|
+
node [ fixedsize = true, height = 0.65, shape = doublecircle ];
|
9
|
+
4;
|
10
|
+
5;
|
11
|
+
node [ shape = circle ];
|
12
|
+
1 -> 2 [ label = "'?'" ];
|
13
|
+
2 -> 4 [ label = "'\\n' / last4, initts" ];
|
14
|
+
3 -> 3 [ label = "-128..-1, 1..'!', '#'..127" ];
|
15
|
+
3 -> 4 [ label = "'\"' / last2, initts" ];
|
16
|
+
4 -> 5 [ label = "-128..-1, 1..'\\t', '\\v'..'\\f', 14..'!', '#'..'+', '-'..127 / ts" ];
|
17
|
+
4 -> 4 [ label = "0 / ts, last5, initts" ];
|
18
|
+
4 -> 1 [ label = "'\\r' / ts" ];
|
19
|
+
4 -> 3 [ label = "'\"' / ts" ];
|
20
|
+
4 -> 4 [ label = "',' / ts, last3, initts" ];
|
21
|
+
5 -> 4 [ label = "0, '\\n', '\\r', '\"', ',' / next1, initts" ];
|
22
|
+
5 -> 5 [ label = "DEF" ];
|
23
|
+
ENTRY -> 4 [ label = "IN" ];
|
24
|
+
en_4 -> 4 [ label = "csv_scan" ];
|
25
|
+
5 -> eof_5 [ label = "EOF / next1" ];
|
26
|
+
}
|
data/diagram/diagram.rl
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
%%{
|
2
|
+
machine csv;
|
3
|
+
|
4
|
+
variable p s->p;
|
5
|
+
variable pe s->pe;
|
6
|
+
variable eof s->eof;
|
7
|
+
access s->;
|
8
|
+
|
9
|
+
eol = [\r\n];
|
10
|
+
comment = '#';
|
11
|
+
CR = "\r";
|
12
|
+
LF = "\n";
|
13
|
+
|
14
|
+
EOF = 0;
|
15
|
+
EOL = /\r?\n/;
|
16
|
+
comma = [,];
|
17
|
+
string = [^,"\r\n\0]*;
|
18
|
+
quote = '"' [^"\0]* '"';
|
19
|
+
|
20
|
+
csv_scan := |*
|
21
|
+
|
22
|
+
string => {
|
23
|
+
return_token(TK_String);
|
24
|
+
fbreak;
|
25
|
+
};
|
26
|
+
|
27
|
+
quote => {
|
28
|
+
return_token(TK_Quote);
|
29
|
+
s->data += 1;
|
30
|
+
fbreak;
|
31
|
+
};
|
32
|
+
|
33
|
+
comma => {
|
34
|
+
return_token(TK_Comma);
|
35
|
+
fbreak;
|
36
|
+
};
|
37
|
+
|
38
|
+
EOL => {
|
39
|
+
s->curline += 1;
|
40
|
+
return_token(TK_EOL);
|
41
|
+
fbreak;
|
42
|
+
};
|
43
|
+
|
44
|
+
EOF => {
|
45
|
+
return_token(TK_EOF);
|
46
|
+
fbreak;
|
47
|
+
};
|
48
|
+
|
49
|
+
*|;
|
50
|
+
}%%
|
data/lib/censive.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
# censive - A quick and lightweight CSV handling library for Ruby
|
5
5
|
#
|
6
6
|
# Author: Steve Shreeve (steve.shreeve@gmail.com)
|
7
|
-
# Date: Feb
|
7
|
+
# Date: Feb 14, 2023
|
8
8
|
#
|
9
9
|
# https://crystal-lang.org/api/1.7.2/CSV.html (Crystal's CSV library)
|
10
10
|
# https://github.com/ruby/strscan/blob/master/ext/strscan/strscan.c
|
@@ -14,14 +14,22 @@
|
|
14
14
|
# GOALS:
|
15
15
|
# 1. Faster than Ruby's default CSV library
|
16
16
|
# 2. Lightweight code with streamlined and optimized logic
|
17
|
-
# 3. Support most non-compliant CSV variations (
|
17
|
+
# 3. Support most non-compliant CSV variations (@excel, @relax, etc)
|
18
|
+
# 4. Support most commonly used CSV options (@sep, @quote, @strip, @drop, etc)
|
18
19
|
#
|
19
|
-
# TODO:
|
20
|
+
# TODO:
|
21
|
+
# 1. Support IO streaming
|
22
|
+
# 2. Review all encodings, we may be losing speed when mixing encodings
|
23
|
+
# 3. Speedup possible if our @unquoted regex reads beyond @eol's
|
24
|
+
# 4. Will using String#freeze give us a speed up?
|
25
|
+
# 5. Implement support for scan_until(string) <= right now only regex is valid
|
20
26
|
# ============================================================================
|
21
27
|
|
22
28
|
require "strscan"
|
23
29
|
|
24
30
|
class Censive < StringScanner
|
31
|
+
attr :encoding, :out
|
32
|
+
|
25
33
|
def self.parse(...)
|
26
34
|
new(...).parse
|
27
35
|
end
|
@@ -34,78 +42,73 @@ class Censive < StringScanner
|
|
34
42
|
end
|
35
43
|
end
|
36
44
|
|
37
|
-
def initialize(str=
|
38
|
-
drop: false , # drop trailing empty
|
39
|
-
encoding:
|
45
|
+
def initialize(str=nil,
|
46
|
+
drop: false , # drop trailing empty columns?
|
47
|
+
encoding: nil , # character encoding
|
40
48
|
excel: false , # literals ="01" formulas =A1 + B2 http://bit.ly/3Y7jIvc
|
41
49
|
mode: :compact, # export mode: compact or full
|
42
|
-
out:
|
50
|
+
out: nil , # output stream, needs to respond to <<
|
43
51
|
quote: '"' , # quote character
|
44
52
|
relax: false , # relax quote parsing so ,"Fo"o, => ,"Fo""o",
|
45
53
|
rowsep: "\n" , # row separator for export
|
46
54
|
sep: "," , # column separator character
|
47
|
-
strip: false , # strip
|
48
|
-
**opts
|
55
|
+
strip: false , # strip columns when reading
|
56
|
+
**opts # grab bag
|
49
57
|
)
|
50
|
-
# data source
|
51
|
-
str
|
58
|
+
# initialize data source
|
59
|
+
if str && str.size < 100 && File.readable?(str)
|
60
|
+
str = File.open(str, encoding ? "r:#{encoding}" : "r").read
|
61
|
+
else
|
62
|
+
str ||= ""
|
63
|
+
str = str.encode(encoding) if encoding
|
64
|
+
end
|
52
65
|
super(str)
|
53
66
|
reset
|
54
67
|
|
55
|
-
# options
|
68
|
+
# config options
|
69
|
+
@cheat = true
|
56
70
|
@drop = drop
|
71
|
+
@encoding = str.encoding
|
57
72
|
@excel = excel
|
58
73
|
@mode = mode
|
59
|
-
@out = out
|
60
|
-
@quote = quote
|
74
|
+
@out = out || $stdout
|
61
75
|
@relax = relax
|
76
|
+
@strip = strip
|
77
|
+
|
78
|
+
# config strings
|
79
|
+
@quote = quote
|
62
80
|
@rowsep = rowsep
|
63
81
|
@sep = sep
|
64
|
-
@strip = strip
|
65
82
|
|
66
|
-
#
|
67
|
-
@cr
|
68
|
-
@lf
|
69
|
-
@es
|
70
|
-
@eq
|
71
|
-
|
72
|
-
|
73
|
-
@
|
83
|
+
# static strings
|
84
|
+
@cr = "\r"
|
85
|
+
@lf = "\n"
|
86
|
+
@es = ""
|
87
|
+
@eq = "="
|
88
|
+
|
89
|
+
# combinations
|
90
|
+
@esc = (@quote * 2)
|
91
|
+
@seq = [@sep, @eq].join # used for parsing in excel mode
|
92
|
+
|
93
|
+
# regexes
|
94
|
+
@eoc = /(?=#{"\\" + @sep}|#{@cr}|#{@lf}|\z)/o # end of cell
|
95
|
+
@eol = /#{@cr}#{@lf}?|#{@lf}/o # end of line
|
96
|
+
@escapes = /(#{@quote})|#{"\\"+@sep}|#{@cr}|#{@lf}/o
|
97
|
+
@quotable = /#{"\\"+@sep}|#{@cr}|#{@lf}/o
|
98
|
+
@quotes = /#{@quote}/o
|
99
|
+
@seps = /#{@sep}+/o
|
100
|
+
@quoted = @excel ? /(?:=)?#{@quote}/o : @quote
|
101
|
+
@unquoted = /[^#{@sep}#{@cr}#{@lf}][^#{@quote}#{@cr}#{@lf}]*/o
|
102
|
+
@leadzero = /\A0\d*\z/
|
74
103
|
end
|
75
104
|
|
76
105
|
def reset(str=nil)
|
77
|
-
self.string = str if str
|
78
|
-
super()
|
79
106
|
@rows = nil
|
80
107
|
@cols = @cells = 0
|
81
|
-
end
|
82
|
-
|
83
|
-
# ==[ Lexer ]==
|
84
108
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
if scan(@quote) # consume quoted cell
|
89
|
-
token = ""
|
90
|
-
while true
|
91
|
-
token << (scan_until(/#{@quote}/o) or bomb "unclosed quote")[0..-2]
|
92
|
-
token << @quote and next if scan(@quote)
|
93
|
-
break if scan(@eoc)
|
94
|
-
@relax or bomb "invalid character after quote"
|
95
|
-
token << @quote + (scan_until(/#{@quote}/o) or bomb "bad inline quote")
|
96
|
-
end
|
97
|
-
elsif scan(@sep) then return @es
|
98
|
-
elsif scan(@eol) then return nil
|
99
|
-
else # consume unquoted cell
|
100
|
-
token = scan_until(@eoc) or bomb "unexpected character"
|
101
|
-
token.prepend(@eq) if excel
|
102
|
-
end
|
103
|
-
scan(@sep)
|
104
|
-
@strip ? token.strip : token
|
105
|
-
end
|
106
|
-
|
107
|
-
def bomb(msg)
|
108
|
-
abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
109
|
+
self.string = str if str
|
110
|
+
@encoding = string.encoding
|
111
|
+
super()
|
109
112
|
end
|
110
113
|
|
111
114
|
# ==[ Parser ]==
|
@@ -122,18 +125,72 @@ class Censive < StringScanner
|
|
122
125
|
end
|
123
126
|
|
124
127
|
def next_row
|
128
|
+
if @cheat and line = scan_until(@eol)
|
129
|
+
row = line.chomp!.split(@sep, -1)
|
130
|
+
row.each do |col|
|
131
|
+
next if (saw = col.count(@quote)).zero?
|
132
|
+
next if (saw == 2) && col.delete_prefix!(@quote) && col.delete_suffix!(@quote)
|
133
|
+
@cheat = false
|
134
|
+
break
|
135
|
+
end if line.include?(@quote)
|
136
|
+
@cheat and return @strip ? row.each(&:strip!) : row
|
137
|
+
unscan
|
138
|
+
end
|
139
|
+
|
125
140
|
token = next_token or return
|
126
|
-
row = [
|
127
|
-
row
|
141
|
+
row = []
|
142
|
+
row.push(*token)
|
143
|
+
row.push(*token) while token = next_token
|
128
144
|
row
|
129
145
|
end
|
130
146
|
|
147
|
+
def next_token
|
148
|
+
if scan(@quoted) # quoted cell
|
149
|
+
token = ""
|
150
|
+
while true
|
151
|
+
token << (scan_until(@quotes) or bomb "unclosed quote")[0..-2]
|
152
|
+
token << @quote and next if scan(@quote)
|
153
|
+
scan(@eoc) and break
|
154
|
+
@relax or bomb "invalid character after quote"
|
155
|
+
token << @quote + (scan_until(@quotes) or bomb "bad inline quote")
|
156
|
+
end
|
157
|
+
scan(@sep)
|
158
|
+
@strip ? token.strip : token
|
159
|
+
elsif match = scan(@unquoted) # unquoted cell(s)
|
160
|
+
if check(@quote) && !match.chomp!(@sep) # if we see a stray quote
|
161
|
+
unless @excel && match.chomp!(@seq) # unless an excel literal, fix it
|
162
|
+
match << (scan_until(@eoc) or bomb "stray quote")
|
163
|
+
scan(@sep)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
tokens = match.split(@sep, -1)
|
167
|
+
@strip ? tokens.map!(&:strip) : tokens
|
168
|
+
elsif scan(@sep)
|
169
|
+
match = scan(@seps)
|
170
|
+
match ? match.split(@sep, -1) : @es
|
171
|
+
else
|
172
|
+
scan(@eol)
|
173
|
+
nil
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def each
|
178
|
+
@rows ||= parse
|
179
|
+
@rows.each {|row| yield row }
|
180
|
+
end
|
181
|
+
|
182
|
+
def export(**opts)
|
183
|
+
out = opts.empty? ? self : self.class.writer(**opts)
|
184
|
+
each {|row| out << row }
|
185
|
+
out.out
|
186
|
+
end
|
187
|
+
|
131
188
|
# ==[ Helpers ]==
|
132
189
|
|
133
190
|
# returns 2 (must be quoted and escaped), 1 (must be quoted), 0 (neither)
|
134
191
|
def grok(str)
|
135
|
-
if idx = str.index(
|
136
|
-
$1 ? 2 : str.index(
|
192
|
+
if idx = str.index(@escapes)
|
193
|
+
$1 ? 2 : str.index(@quotes, idx) ? 2 : 1
|
137
194
|
else
|
138
195
|
0
|
139
196
|
end
|
@@ -153,11 +210,11 @@ class Censive < StringScanner
|
|
153
210
|
row
|
154
211
|
when 1
|
155
212
|
row.map do |col|
|
156
|
-
col.match?(
|
213
|
+
col.match?(@quotable) ? "#{q}#{col}#{q}" : col
|
157
214
|
end
|
158
215
|
else
|
159
216
|
row.map do |col|
|
160
|
-
@excel && col =~
|
217
|
+
@excel && col =~ @leadzero ? "=#{q}#{col}#{q}" :
|
161
218
|
case grok(col)
|
162
219
|
when 0 then col
|
163
220
|
when 1 then "#{q}#{col}#{q}"
|
@@ -168,7 +225,7 @@ class Censive < StringScanner
|
|
168
225
|
when :full
|
169
226
|
if @excel
|
170
227
|
row.map do |col|
|
171
|
-
col =~
|
228
|
+
col =~ @leadzero ? "=#{q}#{col}#{q}" : "#{q}#{col.gsub(q, @esc)}#{q}"
|
172
229
|
end
|
173
230
|
else
|
174
231
|
row.map {|col| "#{q}#{col.gsub(q, @esc)}#{q}" }
|
@@ -178,16 +235,6 @@ class Censive < StringScanner
|
|
178
235
|
@out << out + @rowsep
|
179
236
|
end
|
180
237
|
|
181
|
-
def each
|
182
|
-
@rows ||= parse
|
183
|
-
@rows.each {|row| yield row }
|
184
|
-
end
|
185
|
-
|
186
|
-
def export(**opts)
|
187
|
-
out = opts.empty? ? self : self.class.writer(**opts)
|
188
|
-
each {|row| out << row }
|
189
|
-
end
|
190
|
-
|
191
238
|
def stats
|
192
239
|
wide = string.size.to_s.size
|
193
240
|
puts "%#{wide}d rows" % @rows.size
|
@@ -195,27 +242,38 @@ class Censive < StringScanner
|
|
195
242
|
puts "%#{wide}d cells" % @cells
|
196
243
|
puts "%#{wide}d bytes" % string.size
|
197
244
|
end
|
245
|
+
|
246
|
+
def bomb(msg)
|
247
|
+
abort "\n#{File.basename($0)}: #{msg} at character #{pos} near '#{string[pos-4,7]}'"
|
248
|
+
end
|
198
249
|
end
|
199
250
|
|
200
251
|
if __FILE__ == $0
|
201
|
-
|
202
|
-
#
|
203
|
-
|
204
|
-
|
252
|
+
str = DATA.gets("\n\n").chomp
|
253
|
+
# str = File.read(ARGV.first || "lc-2023.csv")
|
254
|
+
# str = File.open("KEN_ALL.CSV", "r:cp932").read
|
255
|
+
|
256
|
+
# require "stringio"
|
257
|
+
# csv = Censive.new(str, excel: true, relax: true)
|
258
|
+
# out = "" # StringIO.new
|
259
|
+
# csv.export(out: out) # (excel: true) # sep: "|")
|
260
|
+
# puts out # .string
|
261
|
+
|
262
|
+
puts Censive.new(str, excel: true, relax: true, out: "").export
|
205
263
|
end
|
206
264
|
|
207
265
|
__END__
|
208
|
-
|
209
|
-
|
266
|
+
"Don",="007",10,"Ed"
|
267
|
+
Name,Age,,,Shoe,,,
|
268
|
+
"Alice",27,5
|
210
269
|
Bob,33,10 1/2
|
211
270
|
Charlie or "Chuck",=B2 + B3,9
|
212
|
-
"Doug E Fresh",="007",10
|
213
271
|
Subtotal,=sum(B2:B5),="01234"
|
214
|
-
|
215
|
-
|
272
|
+
A,B,C,D
|
273
|
+
A,B,"C",D
|
274
|
+
A,B,C",D
|
275
|
+
A,B,"C",D
|
216
276
|
123,"CHO, JOELLE "JOJO"",456
|
217
277
|
123,"CHO, JOELLE ""JOJO""",456
|
218
|
-
|
219
|
-
# Excel mode checking
|
220
278
|
=,=x,x=,="x",="","","=",123,0123,="123",="0123"
|
221
|
-
,=x,x=,x,,,,,,=,,123,="0123",123,,="0123"
|
279
|
+
,=x,x=,x,,,,,,=,,123,="0123",123,,="0123"
|
data/lib/test-censive.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "./censive"
|
4
|
+
require "digest/md5"
|
5
|
+
|
6
|
+
path = ARGV[0] || "KEN_ALL.CSV"
|
7
|
+
mode = path =~ /^ken/i ? "r:cp932" : "r"
|
8
|
+
|
9
|
+
data = File.open(path, mode).read
|
10
|
+
rows = Censive.parse(data)
|
11
|
+
|
12
|
+
puts "%s %s (%d size)" % [Digest::MD5.hexdigest(rows.join), path, File.stat(path).size], ""
|
data/lib/test-csv.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "csv"
|
4
|
+
require "digest/md5"
|
5
|
+
|
6
|
+
path = ARGV[0] || "KEN_ALL.CSV"
|
7
|
+
mode = path =~ /^ken/i ? "r:cp932" : "r"
|
8
|
+
|
9
|
+
data = File.open(path, mode).read
|
10
|
+
rows = CSV.parse(data)
|
11
|
+
|
12
|
+
puts "%s %s (%d size)" % [Digest::MD5.hexdigest(rows.join), path, File.stat(path).size], ""
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: censive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.21'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steve Shreeve
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-02-
|
11
|
+
date: 2023-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A quick and lightweight CSV handling library for Ruby
|
14
14
|
email: steve.shreeve@gmail.com
|
@@ -19,7 +19,18 @@ files:
|
|
19
19
|
- LICENSE
|
20
20
|
- README.md
|
21
21
|
- censive.gemspec
|
22
|
+
- diagram/NFA to Regex.pdf
|
23
|
+
- diagram/censive@ce9d51d.png
|
24
|
+
- diagram/csv-ragel.dot
|
25
|
+
- diagram/csv.dot
|
26
|
+
- diagram/csv.png
|
27
|
+
- diagram/csv.rl
|
28
|
+
- diagram/csv.svg
|
29
|
+
- diagram/diagram.dot
|
30
|
+
- diagram/diagram.rl
|
22
31
|
- lib/censive.rb
|
32
|
+
- lib/test-censive.rb
|
33
|
+
- lib/test-csv.rb
|
23
34
|
- test/a-uses-tabs-and-single-quotes-and-no-trailing-newline.tsv
|
24
35
|
homepage: https://github.com/shreeve/censive
|
25
36
|
licenses:
|