rwv2 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Rwv2 -- Microsoft Word Parser extension
4
+ # Copyright (C) 2003 Hannes Wyss, ywesee - intellectual capital connected
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2.1 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # ywesee - intellectual capital connected
21
+ # Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
22
+ # hwyss@ywesee.com
23
+ #
24
+ # Rwv2 -- Rwv2 -- 21.8.2003 -- hwyss@ywesee.com
25
+
26
+ module Rwv2
27
+ class SubDocumentHandler
28
+ def body_start; end
29
+ def body_end; end
30
+ def footnote_start; end
31
+ def footnote_end; end
32
+ def headers_start; end
33
+ def headers_end; end
34
+ def header_start(header_data); end
35
+ def header_end; end
36
+ end
37
+ class TableHandler
38
+ def row_start(table_properties); end
39
+ def row_end; end
40
+ def cell_start; end
41
+ def cell_end; end
42
+ end
43
+ class TextHandler
44
+ def section_start(section_properties); end
45
+ def section_end; end
46
+ def page_break; end
47
+ def paragraph_start(paragraph_properties); end
48
+ def paragraph_end; end
49
+ def run_of_text(text, character_properties); end
50
+ def picture(pict); end
51
+ end
52
+ end
data/lib/rwv2/rwv2.rb ADDED
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Rwv2 -- Microsoft Word Parser extension
4
+ # Copyright (C) 2003 Hannes Wyss, ywesee - intellectual capital connected
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2.1 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # ywesee - intellectual capital connected
21
+ # Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
22
+ # hwyss@ywesee.com
23
+ ## rwv2 -- rwv2 -- 24.09.2003 -- rwaltert@ywesee.com
24
+
25
+ require 'rwv2'
26
+ require 'rwv2/handlers'
27
+
28
+ VERSION = '0.6.0'
data/rwv2-0.2.3.patch ADDED
@@ -0,0 +1,223 @@
1
+ diff -Nur wv2-0.2.3/src/handlers.cpp wv2-0.2.3-p1/src/handlers.cpp
2
+ --- wv2-0.2.3/src/handlers.cpp 2006-06-12 18:40:11.000000000 +0200
3
+ +++ wv2-0.2.3-p1/src/handlers.cpp 2008-02-18 18:20:26.000000000 +0100
4
+ @@ -177,6 +177,11 @@
5
+ {
6
+ }
7
+
8
+ +void TextHandler::pictureData( SharedPtr<const Word97::PICF> /*picf*/,
9
+ + U8* /*buffer*/ )
10
+ +{
11
+ +}
12
+ +
13
+ void TextHandler::tableRowFound( const TableRowFunctor& tableRow, SharedPtr<const Word97::TAP> /*tap*/ )
14
+ {
15
+ tableRow();
16
+ diff -Nur wv2-0.2.3/src/handlers.h wv2-0.2.3-p1/src/handlers.h
17
+ --- wv2-0.2.3/src/handlers.h 2006-06-12 18:40:11.000000000 +0200
18
+ +++ wv2-0.2.3-p1/src/handlers.h 2008-02-18 18:20:30.000000000 +0100
19
+ @@ -251,7 +251,9 @@
20
+ * Very special characters (bad, bad name) are the ones which need additional
21
+ * information from the file (i.e. the plain "put the current date there" isn't sufficent).
22
+ */
23
+ - enum VerySpecialCharacter { Picture = 1, FootnoteAuto = 2, FieldBegin = 19, FieldSeparator = 20,
24
+ + enum VerySpecialCharacter { Picture = 1, FootnoteAuto = 2,
25
+ + Drawing = 8,
26
+ + FieldBegin = 19, FieldSeparator = 20,
27
+ FieldEnd = 21, FieldEscapeChar = 92 };
28
+
29
+ /**
30
+ @@ -299,6 +301,13 @@
31
+ virtual void fieldEnd( const FLD* fld, SharedPtr<const Word97::CHP> chp );
32
+
33
+ /**
34
+ + * This method is called every time we find a picture.
35
+ + * @param picf the picture-data.
36
+ + */
37
+ + virtual void pictureData( SharedPtr<const Word97::PICF> picf,
38
+ + U8* buffer);
39
+ +
40
+ + /**
41
+ * This method is called every time we find a table row. The default
42
+ * implementation invokes the functor, which triggers the parsing
43
+ * process for the given table row.
44
+ diff -Nur wv2-0.2.3/src/parser9x.cpp wv2-0.2.3-p1/src/parser9x.cpp
45
+ --- wv2-0.2.3/src/parser9x.cpp 2006-06-12 18:40:11.000000000 +0200
46
+ +++ wv2-0.2.3-p1/src/parser9x.cpp 2008-02-20 17:32:03.000000000 +0100
47
+ @@ -711,6 +711,7 @@
48
+ break;
49
+
50
+ // It has to be one of the very special characters...
51
+ + case TextHandler::Drawing:
52
+ case TextHandler::Picture:
53
+ emitPictureData( chp );
54
+ break;
55
+ @@ -812,7 +813,6 @@
56
+ picf = new Word97::PICF( Word95::toWord97( Word95::PICF( stream, false ) ) );
57
+ else
58
+ picf = new Word97::PICF( stream, false );
59
+ - stream->pop();
60
+
61
+ if ( picf->cbHeader < 58 ) {
62
+ wvlog << "Error: Found an image with a PICF smaller than 58 bytes! Skipping the image." << std::endl;
63
+ @@ -838,8 +838,17 @@
64
+ << std::endl << " dxaOrigin=" << picf->dxaOrigin << " dyaOrigin="
65
+ << picf->dyaOrigin << std::endl;
66
+ #endif
67
+ - // for now
68
+ - delete picf;
69
+ +
70
+ + /* extract the image blob */
71
+ + stream->seek( chp->fcPic_fcObj_lTagObj + picf->cbHeader, G_SEEK_SET );
72
+ + U32 len = picf->lcb - picf->cbHeader;
73
+ + U8* buffer;
74
+ + buffer = (U8 *)malloc(len * sizeof(U8));
75
+ + stream->read(buffer, len);
76
+ + m_textHandler->pictureData( picf, buffer );
77
+ + free(buffer);
78
+ +
79
+ + stream->pop();
80
+ }
81
+
82
+ void Parser9x::parseHeader( const HeaderData& data, unsigned char mask )
83
+ diff -Nur wv2-0.2.3/src/styles.cpp wv2-0.2.3-p1/src/styles.cpp
84
+ --- wv2-0.2.3/src/styles.cpp 2006-06-12 18:40:11.000000000 +0200
85
+ +++ wv2-0.2.3-p1/src/styles.cpp 2008-02-21 11:24:07.000000000 +0100
86
+ @@ -445,6 +445,9 @@
87
+ parentStyle = stylesheet.styleByIndex( m_std->istdBase );
88
+ if ( parentStyle ) {
89
+ const_cast<Style*>( parentStyle )->unwrapStyle( stylesheet, version );
90
+ + // I'm getting Segfaults where there is no m_upechpx in parentStyle
91
+ + if ( !parentStyle->m_upechpx )
92
+ + parentStyle->m_upechpx = new UPECHPX();
93
+ bool ok;
94
+ m_upechpx->istd = stylesheet.indexByID( m_std->sti, ok );
95
+ mergeUpechpx( parentStyle, version );
96
+ @@ -665,7 +668,7 @@
97
+ else if ( cbStshi == Word97::STSHI::sizeOf )
98
+ m_stsh.read( tableStream, false );
99
+ else {
100
+ - wvlog << "Detected a different STSHI, check this (trying to read Word97 one)" << std::endl;
101
+ + wvlog << "Detected a different STSHI, check this (trying to read Word97 one - probably Latent Style Data added in Word2003)" << std::endl;
102
+ m_stsh.read( tableStream, false );
103
+ }
104
+
105
+ diff -Nur wv2-0.2.3/src/word97_generated.h wv2-0.2.3-p1/src/word97_generated.h
106
+ --- wv2-0.2.3/src/word97_generated.h 2006-06-12 18:40:12.000000000 +0200
107
+ +++ wv2-0.2.3-p1/src/word97_generated.h 2008-02-21 11:03:07.000000000 +0100
108
+ @@ -8623,6 +8623,18 @@
109
+ */
110
+ U16 rgftcStandardChpStsh[3];
111
+
112
+ +
113
+ + /** introduced in Word2003 **/
114
+ + /**
115
+ + * size of each lsd in mpstilsd. The count of lsd's is stiMaxWhenSaved
116
+ + */
117
+ + //U16 cbLSD;
118
+ +
119
+ + /**
120
+ + * latent style data (stiMax == stiMaxWhenSaved upon save!)
121
+ + */
122
+ + //LSD mpstilsd[3];
123
+ +
124
+ }; // STSHI
125
+
126
+ bool operator==(const STSHI &lhs, const STSHI &rhs);
127
+ diff -Nur wv2-0.2.3/src/word97_helper.cpp wv2-0.2.3-p1/src/word97_helper.cpp
128
+ --- wv2-0.2.3/src/word97_helper.cpp 2006-06-12 18:50:45.000000000 +0200
129
+ +++ wv2-0.2.3-p1/src/word97_helper.cpp 2008-02-21 11:33:21.000000000 +0100
130
+ @@ -1137,8 +1137,14 @@
131
+ fBold = *ptr == 1;
132
+ else if ( *ptr == 128 && paragraphStyle )
133
+ fBold = paragraphStyle->chp().fBold;
134
+ - else if ( *ptr == 129 && paragraphStyle )
135
+ - fBold = !( paragraphStyle->chp().fBold );
136
+ + else if ( *ptr == 129 )
137
+ + /**
138
+ + * there are some Word-Documents where the Nil Style seems to be
139
+ + * defined as Reversed. Obviously the Nil Style has no
140
+ + * ParentStyle, which is why I've moved the paragraphStyle-check
141
+ + * from the else if clause down to the next line.
142
+ + */
143
+ + fBold = !( paragraphStyle && paragraphStyle->chp().fBold );
144
+ else
145
+ wvlog << "Warning: sprmCFBold couldn't find a style" << std::endl;
146
+ break;
147
+ @@ -1147,8 +1153,8 @@
148
+ fItalic = *ptr == 1;
149
+ else if ( *ptr == 128 && paragraphStyle )
150
+ fItalic = paragraphStyle->chp().fItalic;
151
+ - else if ( *ptr == 129 && paragraphStyle )
152
+ - fItalic = !( paragraphStyle->chp().fItalic );
153
+ + else if ( *ptr == 129 )
154
+ + fItalic = !( paragraphStyle && paragraphStyle->chp().fItalic );
155
+ else
156
+ wvlog << "Warning: sprmCFItalic couldn't find a style" << std::endl;
157
+ break;
158
+ @@ -1157,8 +1163,8 @@
159
+ fStrike = *ptr == 1;
160
+ else if ( *ptr == 128 && paragraphStyle )
161
+ fStrike = paragraphStyle->chp().fStrike;
162
+ - else if ( *ptr == 129 && paragraphStyle )
163
+ - fStrike = !( paragraphStyle->chp().fStrike );
164
+ + else if ( *ptr == 129 )
165
+ + fStrike = !( paragraphStyle && paragraphStyle->chp().fStrike );
166
+ else
167
+ wvlog << "Warning: sprmCFStrike couldn't find a style" << std::endl;
168
+ break;
169
+ @@ -1167,8 +1173,8 @@
170
+ fOutline = *ptr == 1;
171
+ else if ( *ptr == 128 && paragraphStyle )
172
+ fOutline = paragraphStyle->chp().fOutline;
173
+ - else if ( *ptr == 129 && paragraphStyle )
174
+ - fOutline = !( paragraphStyle->chp().fOutline );
175
+ + else if ( *ptr == 129 )
176
+ + fOutline = !( paragraphStyle && paragraphStyle->chp().fOutline );
177
+ else
178
+ wvlog << "Warning: sprmCFOutline couldn't find a style" << std::endl;
179
+ break;
180
+ @@ -1177,8 +1183,8 @@
181
+ fShadow = *ptr == 1;
182
+ else if ( *ptr == 128 && paragraphStyle )
183
+ fShadow = paragraphStyle->chp().fShadow;
184
+ - else if ( *ptr == 129 && paragraphStyle )
185
+ - fShadow = !( paragraphStyle->chp().fShadow );
186
+ + else if ( *ptr == 129 )
187
+ + fShadow = !( paragraphStyle && paragraphStyle->chp().fShadow );
188
+ else
189
+ wvlog << "Warning: sprmCFShadow couldn't find a style" << std::endl;
190
+ break;
191
+ @@ -1187,8 +1193,8 @@
192
+ fSmallCaps = *ptr == 1;
193
+ else if ( *ptr == 128 && paragraphStyle )
194
+ fSmallCaps = paragraphStyle->chp().fSmallCaps;
195
+ - else if ( *ptr == 129 && paragraphStyle )
196
+ - fSmallCaps = !( paragraphStyle->chp().fSmallCaps );
197
+ + else if ( *ptr == 129 )
198
+ + fSmallCaps = !( paragraphStyle && paragraphStyle->chp().fSmallCaps );
199
+ else
200
+ wvlog << "Warning: sprmCFSmallCaps couldn't find a style" << std::endl;
201
+ break;
202
+ @@ -1197,8 +1203,8 @@
203
+ fCaps = *ptr == 1;
204
+ else if ( *ptr == 128 && paragraphStyle )
205
+ fCaps = paragraphStyle->chp().fCaps;
206
+ - else if ( *ptr == 129 && paragraphStyle )
207
+ - fCaps = !( paragraphStyle->chp().fCaps );
208
+ + else if ( *ptr == 129 )
209
+ + fCaps = !( paragraphStyle && paragraphStyle->chp().fCaps );
210
+ else
211
+ wvlog << "Warning: sprmCFCaps couldn't find a style" << std::endl;
212
+ break;
213
+ @@ -1207,8 +1213,8 @@
214
+ fVanish = *ptr == 1;
215
+ else if ( *ptr == 128 && paragraphStyle )
216
+ fVanish = paragraphStyle->chp().fVanish;
217
+ - else if ( *ptr == 129 && paragraphStyle )
218
+ - fVanish = !( paragraphStyle->chp().fVanish );
219
+ + else if ( *ptr == 129 )
220
+ + fVanish = !( paragraphStyle && paragraphStyle->chp().fVanish );
221
+ else
222
+ wvlog << "Warning: sprmCFVanish couldn't find a style" << std::endl;
223
+ break;
@@ -0,0 +1,16 @@
1
+ {\rtf1\ansi\deff0\adeflang1025
2
+ {\fonttbl{\f0\froman\fprq2\fcharset0 Times New Roman;}{\f1\froman\fprq2\fcharset0 Times New Roman;}{\f2\fnil\fprq2\fcharset0 Arial;}{\f3\fnil\fprq0\fcharset0 Times New Roman;}{\f4\fnil\fprq2\fcharset0 HG Mincho Light J{\*\falt msmincho};}{\f5\fnil\fprq2\fcharset0 Lucidasans;}{\f6\fnil\fprq0\fcharset0 Lucidasans;}}
3
+ {\colortbl;\red0\green0\blue0;\red128\green128\blue128;}
4
+ {\stylesheet{\s1\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\af5\afs24\lang255\ltrch\dbch\af2\langfe255\hich\f0\fs24\lang2057\loch\f0\fs24\lang2057\snext1 Normal;}
5
+ {\s2\sb240\sa120\keepn\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\afs28\lang255\ltrch\dbch\af4\langfe255\hich\f3\fs28\lang2057\loch\f3\fs28\lang2057\sbasedon1\snext3 Heading;}
6
+ {\s3\sa120\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\af5\afs24\lang255\ltrch\dbch\af2\langfe255\hich\f0\fs24\lang2057\loch\f0\fs24\lang2057\sbasedon1\snext3 Body Text;}
7
+ {\s4\sa120\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\af6\afs24\lang255\ltrch\dbch\af2\langfe255\hich\f0\fs24\lang2057\loch\f0\fs24\lang2057\sbasedon3\snext4 List;}
8
+ {\s5\sb120\sa120\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\af6\afs24\lang255\ai\ltrch\dbch\af2\langfe255\hich\f0\fs24\lang2057\i\loch\f0\fs24\lang2057\i\sbasedon1\snext5 caption;}
9
+ {\s6\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\af6\afs24\lang255\ltrch\dbch\af2\langfe255\hich\f0\fs24\lang2057\loch\f0\fs24\lang2057\sbasedon1\snext6 Index;}
10
+ }
11
+ {\info{\author Hannes Wyss}{\creatim\yr2008\mo2\dy19\hr11\min34}{\revtim\yr0\mo0\dy0\hr0\min0}{\printim\yr0\mo0\dy0\hr0\min0}{\comment StarWriter}{\vern6800}}\deftab709
12
+ {\*\pgdsctbl
13
+ {\pgdsc0\pgdscuse195\pgwsxn12240\pghsxn15840\marglsxn1134\margrsxn1134\margtsxn1134\margbsxn1134\pgdscnxt0 Standard;}}
14
+ \paperh15840\paperw12240\margl1134\margr1134\margt1134\margb1134\sectd\sbknone\pgwsxn12240\pghsxn15840\marglsxn1134\margrsxn1134\margtsxn1134\margbsxn1134\ftnbj\ftnstart1\ftnrstcont\ftnnar\aenddoc\aftnrstcont\aftnstart1\aftnnrlc
15
+ \pard\plain \ltrpar\s1\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\af5\afs24\lang255\ltrch\dbch\af2\langfe255\hich\f0\fs24\lang2057\loch\f0\fs24\lang2057 {\rtlch \ltrch\loch\f0\fs24\lang2057\i0\b0 An RTF-File}
16
+ \par }
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,644 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Rwv2 -- Microsoft Word Parser extension
4
+ # Copyright (C) 2003 Hannes Wyss, ywesee - intellectual capital connected
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2.1 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # ywesee - intellectual capital connected
21
+ # Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
22
+ # hwyss@ywesee.com
23
+ #
24
+ # TestParser -- Rwv2 -- 21.8.2003 -- hwyss@ywesee.com
25
+
26
+ $: << File.expand_path('../ext/rwv2', File.dirname(__FILE__))
27
+ $: << File.expand_path('../lib', File.dirname(__FILE__))
28
+
29
+ require 'test/unit'
30
+ require 'rwv2/rwv2'
31
+ require 'RMagick'
32
+ require 'iconv'
33
+
34
+ class StubInlineReplacementHandler
35
+ attr_accessor :non_required_hyphen
36
+ def column_break
37
+ "c"
38
+ end
39
+ def hard_line_break
40
+ "\n"
41
+ end
42
+ def non_breaking_hyphen
43
+ "="
44
+ end
45
+ def non_breaking_space
46
+ "_"
47
+ end
48
+ def tab
49
+ "t"
50
+ end
51
+ end
52
+ class StubIncompleteReplacementHandler
53
+ def hard_line_break
54
+ "\n"
55
+ end
56
+ end
57
+ ["body", "footnote", "headers", "header"].each { |tpe|
58
+ eval <<-EOF
59
+ class StubSubDocumentHandler
60
+ attr_reader :#{tpe}_starts, :#{tpe}_ends
61
+ def #{tpe}_start
62
+ @#{tpe}_starts ||= 0
63
+ @#{tpe}_starts += 1
64
+ end
65
+ def #{tpe}_end
66
+ @#{tpe}_ends ||= 0
67
+ @#{tpe}_ends += 1
68
+ end
69
+ end
70
+ EOF
71
+ }
72
+ class StubSubDocumentHandler
73
+ attr_reader :header_starts
74
+ def initialize
75
+ @header_starts = []
76
+ end
77
+ def header_start(header_type)
78
+ @header_starts << header_type
79
+ end
80
+ end
81
+ class StubTextHandler
82
+ attr_accessor :texts, :formats, :section_properties, :section_ends
83
+ attr_accessor :page_breaks, :paragraph_properties, :paragraph_ends, :pictures
84
+ def initialize
85
+ @iconv = Iconv.new('utf8', 'utf-16')
86
+ @pictures = []
87
+ @formats = []
88
+ @texts = []
89
+ @section_properties = []
90
+ @paragraph_properties = []
91
+ @section_ends = 0
92
+ @paragraph_ends = 0
93
+ @page_breaks = 0
94
+ end
95
+ def picture(picture)
96
+ @pictures.push picture
97
+ end
98
+ def section_start(sep)
99
+ @section_properties << sep
100
+ end
101
+ def section_end
102
+ @section_ends += 1
103
+ end
104
+ def page_break
105
+ @page_breaks += 1
106
+ end
107
+ def paragraph_start(pap)
108
+ @paragraph_properties << pap
109
+ end
110
+ def paragraph_end
111
+ @paragraph_ends += 1
112
+ end
113
+ def run_of_text(text, format=nil)
114
+ @formats << format unless format.nil?
115
+ @texts << @iconv.iconv(text)
116
+ end
117
+ end
118
+ class StubTableHandler
119
+ attr_reader :row_starts, :row_ends, :cell_starts, :cell_ends
120
+ def initialize
121
+ @row_starts = []
122
+ @row_ends = 0
123
+ @cell_starts = 0
124
+ @cell_ends = 0
125
+ end
126
+ def row_start(properties=nil)
127
+ @row_starts << properties
128
+ end
129
+ def row_end
130
+ @row_ends += 1
131
+ end
132
+ def cell_start
133
+ @cell_starts += 1
134
+ end
135
+ def cell_end
136
+ @cell_ends += 1
137
+ end
138
+ end
139
+
140
+ class TestRwv2Parser < Test::Unit::TestCase
141
+ def setup
142
+ @filename = File.expand_path('data/test.doc', File.dirname(__FILE__))
143
+ @filename2 = File.expand_path('data/test2.doc', File.dirname(__FILE__))
144
+ @filename3 = File.expand_path('data/test3.doc', File.dirname(__FILE__))
145
+ @filename4 = File.expand_path('data/test4.doc', File.dirname(__FILE__))
146
+ @filename5 = File.expand_path('data/test5.doc', File.dirname(__FILE__))
147
+ @filename6 = File.expand_path('data/test6.doc', File.dirname(__FILE__))
148
+ @filename7 = File.expand_path('data/test7.doc', File.dirname(__FILE__))
149
+ @filename8 = File.expand_path('data/test8.doc', File.dirname(__FILE__))
150
+ @filename9 = File.expand_path('data/test9.doc', File.dirname(__FILE__))
151
+ @unavailable = File.expand_path('data/unavailable.doc', File.dirname(__FILE__))
152
+ @rtf = File.expand_path('data/not_a_word_document.rtf', File.dirname(__FILE__))
153
+ @ir_handler = StubInlineReplacementHandler.new
154
+ @ir_handler.non_required_hyphen = "-"
155
+ end
156
+ def test_create_parser
157
+ assert_nothing_raised {
158
+ Rwv2.create_parser(@filename)
159
+ }
160
+ assert_nothing_raised {
161
+ Rwv2.create_parser_from_content(File.read(@filename))
162
+ }
163
+ end
164
+ def test_inline_replacement_handler
165
+ parser = Rwv2.create_parser(@filename)
166
+ handler = StubTextHandler.new
167
+ parser.set_text_handler(handler)
168
+ assert_nothing_raised {
169
+ parser.set_inline_replacement_handler(@ir_handler)
170
+ }
171
+ parser.parse
172
+ expected = [
173
+ "Paragraph 1, Standard",
174
+ "Paragraph 2, Bold",
175
+ "Paragraph 3, Italic",
176
+ "Paragraph 4, Underlined",
177
+ "Paragraph 5, Bold Italic",
178
+ "Paragraph 6, Bold Underlined",
179
+ "Paragraph 7, Italic Underlined",
180
+ "Paragraph 8, Bold Italic Underlined",
181
+ "Paragraph 9, ",
182
+ "mixed Formats",
183
+ "TabtTab",
184
+ "HardLineBreak\nHardLineBreak",
185
+ "ColumnBreakcColumnBreak",
186
+ "NonBreakingHyphen=NonBreakingHyphen",
187
+ "NonRequiredHyphen-NonRequiredHyphen",
188
+ "NonBreakingSpace",
189
+ "_",
190
+ "NonBreakingSpace",
191
+ ]
192
+ assert_equal(expected, handler.texts)
193
+ end
194
+ def test_incomplete_replacement_handler
195
+ parser = Rwv2.create_parser(@filename)
196
+ handler = StubTextHandler.new
197
+ parser.set_text_handler(handler)
198
+ replacer = StubIncompleteReplacementHandler.new
199
+ assert_nothing_raised {
200
+ parser.set_inline_replacement_handler(replacer)
201
+ }
202
+ parser.parse
203
+ expected = [
204
+ "Paragraph 1, Standard",
205
+ "Paragraph 2, Bold",
206
+ "Paragraph 3, Italic",
207
+ "Paragraph 4, Underlined",
208
+ "Paragraph 5, Bold Italic",
209
+ "Paragraph 6, Bold Underlined",
210
+ "Paragraph 7, Italic Underlined",
211
+ "Paragraph 8, Bold Italic Underlined",
212
+ "Paragraph 9, ",
213
+ "mixed Formats",
214
+ "Tab\tTab",
215
+ "HardLineBreak\nHardLineBreak",
216
+ "ColumnBreak\016ColumnBreak",
217
+ "NonBreakingHyphen\036NonBreakingHyphen",
218
+ "NonRequiredHyphen\037NonRequiredHyphen",
219
+ "NonBreakingSpace",
220
+ "\302\240",
221
+ "NonBreakingSpace"
222
+ ]
223
+ assert_equal(expected, handler.texts)
224
+ end
225
+ def test_illegal_replacement_handler
226
+ parser = Rwv2.create_parser(@filename)
227
+ handler = StubTextHandler.new
228
+ parser.set_text_handler(handler)
229
+ parser.set_inline_replacement_handler(@ir_handler)
230
+ @ir_handler.non_required_hyphen = ""
231
+ assert_raises(RuntimeError) {
232
+ parser.parse
233
+ }
234
+ @ir_handler.non_required_hyphen = "--"
235
+ assert_raises(RuntimeError) {
236
+ parser.parse
237
+ }
238
+ end
239
+ def test_subdocument_handler
240
+ parser = Rwv2.create_parser(@filename4)
241
+ handler = StubSubDocumentHandler.new
242
+ assert_nothing_raised {
243
+ parser.set_subdocument_handler(handler)
244
+ }
245
+ parser.parse
246
+ assert_equal(1, handler.body_starts)
247
+ assert_equal(1, handler.body_ends)
248
+ assert_equal(1, handler.footnote_starts)
249
+ assert_equal(1, handler.footnote_ends)
250
+ assert_equal(1, handler.headers_starts)
251
+ assert_equal(1, handler.headers_ends)
252
+ assert_equal(2, handler.header_ends)
253
+ assert_equal([Rwv2::HEADER_ODD, Rwv2::FOOTER_ODD], handler.header_starts)
254
+ end
255
+ def test_table_handler
256
+ parser = Rwv2.create_parser(@filename5)
257
+ handler = StubTableHandler.new
258
+ assert_nothing_raised {
259
+ parser.set_table_handler(handler)
260
+ }
261
+ parser.parse
262
+ assert_equal(6, handler.row_ends)
263
+ assert_equal(11, handler.cell_starts)
264
+ assert_equal(11, handler.cell_ends)
265
+ head = handler.row_starts.at(0)
266
+ row0 = handler.row_starts.at(1)
267
+ row1 = handler.row_starts.at(2)
268
+ row2 = handler.row_starts.at(3)
269
+ row3 = handler.row_starts.at(4)
270
+ row4 = handler.row_starts.at(5)
271
+ assert_equal(283, row0.row_height)
272
+ assert_equal(-283, row1.row_height)
273
+ assert_equal(2, head.row_cells)
274
+ assert_equal(2, row0.row_cells)
275
+ assert_equal(2, row1.row_cells)
276
+ assert_equal(1, row2.row_cells)
277
+ assert_equal(2, row3.row_cells)
278
+ assert_equal(2, row4.row_cells)
279
+ assert_equal(3, row0.cell_boundaries.size)
280
+ assert_equal(0, row0.cell_boundaries.at(0))
281
+ assert_equal(4818, row0.cell_boundaries.at(1))
282
+ assert_equal(9639, row0.cell_boundaries.at(2))
283
+ assert_equal(2, row0.cell_descriptors.size)
284
+ ct0 = row0.cell_descriptors.first
285
+ cta = row0.cell_descriptors.last
286
+ ctb = row1.cell_descriptors.first
287
+ assert_instance_of(Rwv2::TableProperties::CellDescriptor, ct0)
288
+ assert_equal(false, ct0.first_merged?)
289
+ assert_equal(Rwv2::TableProperties::CellDescriptor::ALIGN_TOP, ct0.vertical_align)
290
+ assert_equal(Rwv2::TableProperties::CellDescriptor::ALIGN_CENTER, cta.vertical_align)
291
+ assert_equal(Rwv2::TableProperties::CellDescriptor::ALIGN_BOTTOM, ctb.vertical_align)
292
+ ct2 = row3.cell_descriptors.first
293
+ assert_equal(true, ct2.vertical_merged?)
294
+ assert_equal(true, ct2.vertical_restart?)
295
+ # ct2 = row3.cell_descriptors.first
296
+ # assert_equal(true, ct2.vertical)
297
+
298
+ # FIXME: the following are untested,
299
+ # need a _real_ Wordfile to test...
300
+ # assert_equal(Rwv2::TableProperties::ALIGN_LEFT, row0.align)
301
+ # assert_equal(Rwv2::TableProperties::ALIGN_LEFT, row1.align)
302
+ # row0.gap_half
303
+ # assert_equal(true, row0.cant_split)
304
+ # assert_equal(false, row1.cant_split)
305
+ # ct1 = row2.cell_descriptors.first
306
+ # assert_equal(true, ct1.merged)
307
+ # assert_equal(true, ct1.first_merged)
308
+ # :rotate_font, :backward, :vertical_merged,
309
+ # :vertical_restart, :vertical_align
310
+ end
311
+ def test_text_handler
312
+ parser = Rwv2.create_parser(@filename)
313
+ handler = StubTextHandler.new
314
+ assert_nothing_raised {
315
+ parser.set_text_handler(handler)
316
+ }
317
+ parser.parse
318
+ expected = [
319
+ "Paragraph 1, Standard",
320
+ "Paragraph 2, Bold",
321
+ "Paragraph 3, Italic",
322
+ "Paragraph 4, Underlined",
323
+ "Paragraph 5, Bold Italic",
324
+ "Paragraph 6, Bold Underlined",
325
+ "Paragraph 7, Italic Underlined",
326
+ "Paragraph 8, Bold Italic Underlined",
327
+ "Paragraph 9, ",
328
+ "mixed Formats",
329
+ "Tab\tTab",
330
+ "HardLineBreak\vHardLineBreak",
331
+ "ColumnBreak\016ColumnBreak",
332
+ "NonBreakingHyphen\036NonBreakingHyphen",
333
+ "NonRequiredHyphen\037NonRequiredHyphen",
334
+ "NonBreakingSpace",
335
+ "\302\240",
336
+ "NonBreakingSpace"
337
+ ]
338
+ assert_equal(expected, handler.texts)
339
+ paps = handler.paragraph_properties
340
+ assert_equal(16, paps.size)
341
+ assert_equal(16, handler.paragraph_ends)
342
+ pap0 = paps.at(0)
343
+ pap1 = paps.at(1)
344
+ pap2 = paps.at(2)
345
+ pap3 = paps.at(3)
346
+ pap4 = paps.at(4)
347
+ pap5 = paps.at(5)
348
+ pap6 = paps.at(6)
349
+ assert_instance_of(Rwv2::ParagraphProperties, pap0)
350
+ assert_equal(Rwv2::ALIGN_LEFT, pap0.align)
351
+ assert_equal(Rwv2::ALIGN_CENTER, pap1.align)
352
+ assert_equal(Rwv2::ALIGN_RIGHT, pap2.align)
353
+ assert_equal(Rwv2::ALIGN_JUSTIFY, pap3.align)
354
+ assert_equal(true, pap0.keep?)
355
+ assert_equal(false, pap0.keep_with_next?)
356
+ assert_equal(false, pap0.widow_control?)
357
+ assert_equal(false, pap1.keep?)
358
+ assert_equal(true, pap1.keep_with_next?)
359
+ assert_equal(false, pap1.widow_control?)
360
+ assert_equal(false, pap2.keep?)
361
+ assert_equal(false, pap2.keep_with_next?)
362
+ assert_equal(true, pap2.widow_control?)
363
+ assert_equal(false, pap3.keep?)
364
+ assert_equal(false, pap3.keep_with_next?)
365
+ assert_equal(false, pap3.widow_control?)
366
+ # FIXME does this work with a real Wordfile?
367
+ # assert_equal(false, pap0.page_break_before?)
368
+ # assert_equal(false, pap1.page_break_before?)
369
+ # assert_equal(false, pap2.page_break_before?)
370
+ # assert_equal(true, pap3.page_break_before?)
371
+ assert_equal(1680, pap4.indent_right)
372
+ assert_equal(0, pap4.indent_left)
373
+ assert_equal(0, pap4.indent_first_line)
374
+ assert_equal(0, pap5.indent_right)
375
+ assert_equal(570, pap5.indent_left)
376
+ assert_equal(0, pap5.indent_first_line)
377
+ assert_equal(0, pap6.indent_right)
378
+ assert_equal(570, pap6.indent_left)
379
+ assert_equal(-285, pap6.indent_first_line)
380
+ end
381
+ def test_incomplete_text_handler
382
+ parser = Rwv2.create_parser(@filename)
383
+ handler = Rwv2::TextHandler.new
384
+ parser.set_text_handler(handler)
385
+ assert_nothing_raised { parser.parse }
386
+ end
387
+ def test_character_properties
388
+ parser = Rwv2.create_parser(@filename)
389
+ handler = StubTextHandler.new
390
+ parser.set_text_handler(handler)
391
+ parser.parse
392
+ formats = handler.formats
393
+ assert_equal(false, formats.empty?,
394
+ "The Parser recorded no Character Properties")
395
+ bold = handler.formats.collect { |fmt| fmt.bold? }
396
+ expected = [1,4,5,7,9]
397
+ assert_equal(expected, index_select(formats, :bold?))
398
+ expected = [2,4,6,7]
399
+ assert_equal(expected, index_select(formats, :italic?))
400
+ end
401
+ def test_character_properties2
402
+ parser = Rwv2.create_parser(@filename2)
403
+ handler = StubTextHandler.new
404
+ parser.set_text_handler(handler)
405
+ parser.parse
406
+ expected = [
407
+ "The ", "new Text!",
408
+ "This will be deleted.",
409
+ "Outlined",
410
+ "Small Caps",
411
+ "Caps",
412
+ "Strikethrough",
413
+ "Shadow",
414
+ "Lower Case",
415
+ "Embossed",
416
+ "Engraved",
417
+ "Double Strikethrough",
418
+ ]
419
+ assert_equal(expected, handler.texts)
420
+ formats = handler.formats
421
+ assert_equal(false, formats.empty?,
422
+ "The Parser recorded no Character Properties")
423
+ assert_equal([0,1], index_select(formats, :rev_mark?))
424
+ assert_equal([2], index_select(formats, :rev_mark_del?))
425
+ assert_equal([3], index_select(formats, :outline?))
426
+ assert_equal([4], index_select(formats, :small_caps?))
427
+ assert_equal([5], index_select(formats, :caps?))
428
+ assert_equal([6], index_select(formats, :strikethrough?))
429
+ assert_equal([7], index_select(formats, :shadow?))
430
+ #assert_equal([8], index_select(formats, :lowercase?)) # FIXME
431
+ # our test-file is made with Openoffice - which does not set the
432
+ # lowercase-flag in Word Files...
433
+ assert_equal([9], index_select(formats, :emboss?))
434
+ assert_equal([10], index_select(formats, :imprint?))
435
+ assert_equal([11], index_select(formats, :double_strikethrough?))
436
+ end
437
+ def test_character_properties3
438
+ parser = Rwv2.create_parser(@filename3)
439
+ handler = StubTextHandler.new
440
+ parser.set_text_handler(handler)
441
+ parser.parse
442
+ expected = [
443
+ "Normal",
444
+ "Superscript",
445
+ "Subscript",
446
+ "Single",
447
+ "By Word",
448
+ "Double",
449
+ "Dotted",
450
+ "Thick",
451
+ "Dash",
452
+ "Dot Dash",
453
+ "Dot Dot Dash",
454
+ "Wave",
455
+ ]
456
+ assert_equal(expected, handler.texts)
457
+ formats = handler.formats
458
+ assert_equal(false, formats.empty?,
459
+ "The Parser recorded no Character Properties")
460
+ assert_equal(Rwv2::CharacterProperties::POSITION_NORMAL, formats[0].position)
461
+ assert_equal(Rwv2::CharacterProperties::POSITION_SUPERSCRIPT, formats[1].position)
462
+ assert_equal(Rwv2::CharacterProperties::POSITION_SUBSCRIPT, formats[2].position)
463
+ assert_equal(false, formats[0].underline)
464
+ #assert_equal([3,4,5,6,7,8,9,10,11], index_select(formats, :underline)) # FIXME
465
+ # Openoffice saves simple underline somewhere else?
466
+ assert_equal([4,5,6,7,8,9,10,11], index_select(formats, :underline)) # FIXME
467
+ assert_equal(Rwv2::CharacterProperties::UNDERLINE_NONE, formats[0].underline)
468
+ #assert_equal(Rwv2::CharacterProperties::UNDERLINE_SINGLE, formats[3].underline) # FIXME
469
+ assert_equal(Rwv2::CharacterProperties::UNDERLINE_BY_WORD, formats[4].underline)
470
+ assert_equal(Rwv2::CharacterProperties::UNDERLINE_DOUBLE, formats[5].underline)
471
+ assert_equal(Rwv2::CharacterProperties::UNDERLINE_DOTTED, formats[6].underline)
472
+ assert_equal(Rwv2::CharacterProperties::UNDERLINE_THICK, formats[7].underline)
473
+ assert_equal(Rwv2::CharacterProperties::UNDERLINE_DASH, formats[8].underline)
474
+ assert_equal(Rwv2::CharacterProperties::UNDERLINE_DOT_DASH, formats[9].underline)
475
+ assert_equal(Rwv2::CharacterProperties::UNDERLINE_DOT_DOT_DASH, formats[10].underline)
476
+ assert_equal(Rwv2::CharacterProperties::UNDERLINE_WAVE, formats[11].underline)
477
+ assert_equal(100, formats[0].scale)
478
+ assert_equal(24, formats[0].fontsize)
479
+ end
480
+ def test_section_properties
481
+ parser = Rwv2.create_parser(@filename6)
482
+ handler = StubTextHandler.new
483
+ parser.set_text_handler(handler)
484
+ parser.parse
485
+ properties = handler.section_properties
486
+ assert_equal(5, properties.size)
487
+ assert_equal(5, handler.section_ends)
488
+ assert_equal(1, handler.page_breaks)
489
+ sect0 = properties.at(0)
490
+ sect1 = properties.at(1)
491
+ sect2 = properties.at(2)
492
+ sect3 = properties.at(3)
493
+ sect4 = properties.at(4)
494
+ assert_instance_of(Rwv2::SectionProperties, sect0)
495
+ assert_equal(Rwv2::SectionProperties::NUMBER_ARABIC, sect0.page_number_format)
496
+ assert_equal(false, sect0.title_page?)
497
+ assert_equal(false, sect0.unlocked?)
498
+ assert_equal(false, sect0.page_number_restart?)
499
+ assert_equal(false, sect0.line_numbering_modulus)
500
+ assert_equal(1, sect0.columns)
501
+ assert_equal(2, sect1.columns)
502
+ assert_equal(3, sect2.columns)
503
+ assert_equal(2, sect3.columns)
504
+ assert_equal(1, sect4.columns)
505
+
506
+ # FIXME: get a real wordfile...
507
+ # assert_equal(Rwv2::SectionProperties::BREAK_NONE, sect0.break_code)
508
+ # assert_equal(Rwv2::SectionProperties::BREAK_NONE, sect1.break_code)
509
+ # assert_equal(Rwv2::SectionProperties::BREAK_NONE, sect2.break_code)
510
+ # assert_equal(Rwv2::SectionProperties::BREAK_NONE, sect3.break_code)
511
+ # assert_equal(Rwv2::SectionProperties::BREAK_NONE, sect4.break_code)
512
+ # assert_equal(Rwv2::SectionProperties::LINE_NUMBERING_PER_PAGE, sect0.line_numbering_code)
513
+ # assert_equal(true, sect0.endnote?)
514
+ # assert_equal(true, sect1.endnote?)
515
+ # assert_equal(false, sect2.endnote?)
516
+ # assert_equal(true, sect3.endnote?)
517
+ end
518
+ def test_gc
519
+ parser = Rwv2.create_parser(@filename)
520
+ parser.set_inline_replacement_handler(@ir_handler.dup)
521
+ parser.set_subdocument_handler(StubSubDocumentHandler.new)
522
+ parser.set_text_handler(StubTextHandler.new)
523
+ GC.start
524
+ sleep(0.5)
525
+ assert_nothing_raised {
526
+ parser.parse
527
+ }
528
+ end
529
+ def test_tab_descriptors
530
+ parser = Rwv2.create_parser(@filename7)
531
+ handler = StubTextHandler.new
532
+ assert_nothing_raised {
533
+ parser.set_text_handler(handler)
534
+ }
535
+ parser.parse
536
+ paps = handler.paragraph_properties
537
+ pap0 = paps.at(0)
538
+ assert_instance_of(Rwv2::ParagraphProperties, pap0)
539
+ tabs = pap0.tab_descriptors
540
+ assert_equal(4, tabs.size)
541
+ tab0, tab1, tab2, tab3 = tabs
542
+ assert_instance_of(Rwv2::TabDescriptor, tab0)
543
+ assert_equal(1410, tab0.position)
544
+ assert_equal(2835, tab1.position)
545
+ assert_equal(4230, tab2.position)
546
+ assert_equal(5655, tab3.position)
547
+ assert_equal(Rwv2::TabDescriptor::ALIGN_LEFT, tab0.align)
548
+ assert_equal(Rwv2::TabDescriptor::ALIGN_RIGHT, tab1.align)
549
+ assert_equal(Rwv2::TabDescriptor::ALIGN_CENTER, tab3.align)
550
+ # FIXME
551
+ # assert_equal(Rwv2::TabDescriptor::ALIGN_DECIMAL, tab2.align)
552
+ end
553
+ def test_unavailable
554
+ assert_raises(Errno::ENOENT) {
555
+ Rwv2.create_parser(@unavailable)
556
+ }
557
+ end
558
+ def test_invalid__rtf
559
+ assert_raises(ArgumentError) {
560
+ Rwv2.create_parser(@rtf)
561
+ }
562
+ begin
563
+ Rwv2.create_parser(@rtf)
564
+ rescue ArgumentError => err
565
+ assert_equal(sprintf("'#@rtf' is not a word-document."), err.message)
566
+ end
567
+ end
568
+ def test_invalid__rtf__from_content
569
+ assert_raises(ArgumentError) {
570
+ Rwv2.create_parser_from_content(File.read(@rtf))
571
+ }
572
+ begin
573
+ Rwv2.create_parser_from_content(File.read(@rtf))
574
+ rescue ArgumentError => err
575
+ assert_equal(sprintf("Input is not a word-document."), err.message)
576
+ end
577
+ end
578
+ def test_picture__word95
579
+ ## later openoffice formats don't work yet.
580
+ handler = StubTextHandler.new
581
+ parser = Rwv2.create_parser(@filename8)
582
+ assert_nothing_raised {
583
+ parser.set_text_handler(handler)
584
+ }
585
+ assert_nothing_raised {
586
+ parser.parse
587
+ }
588
+ assert_equal(2, handler.pictures.size)
589
+
590
+ desc = handler.pictures.at(0)
591
+ assert_instance_of(Rwv2::PictureDescriptor, desc)
592
+ assert_equal(1146, desc.display_width)
593
+ assert_equal(1147, desc.display_height)
594
+ assert_equal(999, desc.scaling_horizontal)
595
+ assert_equal(999, desc.scaling_vertical)
596
+ assert_equal(0, desc.crop_left)
597
+ assert_equal(0, desc.crop_top)
598
+ assert_equal(0, desc.crop_right)
599
+ assert_equal(0, desc.crop_bottom)
600
+ assert_equal(false, desc.is_bitmap?)
601
+ assert_equal(false, desc.is_active_ole_object?)
602
+
603
+ pic, = Magick::Image.from_blob(desc.blob)
604
+ assert_equal(38, pic.rows)
605
+ assert_equal(38, pic.columns)
606
+
607
+ desc = handler.pictures.at(1)
608
+ assert_instance_of(Rwv2::PictureDescriptor, desc)
609
+ assert_equal(1145, desc.display_width)
610
+ assert_equal(1146, desc.display_height)
611
+ assert_equal(999, desc.scaling_horizontal)
612
+ assert_equal(999, desc.scaling_vertical)
613
+ assert_equal(0, desc.crop_left)
614
+ assert_equal(0, desc.crop_top)
615
+ assert_equal(0, desc.crop_right)
616
+ assert_equal(0, desc.crop_bottom)
617
+ assert_equal(false, desc.is_bitmap?)
618
+ assert_equal(false, desc.is_active_ole_object?)
619
+
620
+ pic, = Magick::Image.from_blob(desc.blob)
621
+ assert_equal(38, pic.rows)
622
+ assert_equal(38, pic.columns)
623
+ end
624
+ def test_special_characters
625
+ parser = Rwv2.create_parser(@filename9)
626
+ handler = StubTextHandler.new
627
+ assert_nothing_raised {
628
+ parser.set_text_handler(handler)
629
+ }
630
+ parser.parse
631
+ expected = [
632
+ "Ligature: \305\223",
633
+ ]
634
+ assert_equal(expected, handler.texts)
635
+ end
636
+ # helper methods
637
+ def index_select(collection, symbol)
638
+ res = []
639
+ collection.each_with_index { |item, idx|
640
+ (res << idx) if(item.send(symbol))
641
+ }
642
+ res
643
+ end
644
+ end