text-hyphen 1.0.2 → 1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +0 -0
- data/History.rdoc +54 -0
- data/License.rdoc +159 -0
- data/Manifest.txt +67 -5
- data/README.rdoc +69 -0
- data/Rakefile +8 -4
- data/bin/{hyphen → ruby-hyphen} +0 -0
- data/lib/text-hyphen.rb +1 -0
- data/lib/text/hyphen.rb +74 -111
- data/lib/text/hyphen/language.rb +90 -26
- data/lib/text/hyphen/language/1.8/ca.rb +171 -0
- data/lib/text/hyphen/language/1.8/cs.rb +360 -0
- data/lib/text/hyphen/language/1.8/da.rb +117 -0
- data/lib/text/hyphen/language/1.8/de1.rb +718 -0
- data/lib/text/hyphen/language/1.8/de2.rb +680 -0
- data/lib/text/hyphen/language/1.8/en_uk.rb +789 -0
- data/lib/text/hyphen/language/1.8/en_us.rb +490 -0
- data/lib/text/hyphen/language/1.8/es.rb +287 -0
- data/lib/text/hyphen/language/1.8/et.rb +335 -0
- data/lib/text/hyphen/language/1.8/eu.rb +112 -0
- data/lib/text/hyphen/language/1.8/fi.rb +112 -0
- data/lib/text/hyphen/language/1.8/fr.rb +389 -0
- data/lib/text/hyphen/language/1.8/ga.rb +606 -0
- data/lib/text/hyphen/language/1.8/hr.rb +122 -0
- data/lib/text/hyphen/language/1.8/hsb.rb +179 -0
- data/lib/text/hyphen/language/1.8/hu1.rb +380 -0
- data/lib/text/hyphen/language/1.8/hu2.rb +1278 -0
- data/lib/text/hyphen/language/1.8/ia.rb +71 -0
- data/lib/text/hyphen/language/1.8/id.rb +91 -0
- data/lib/text/hyphen/language/1.8/is.rb +387 -0
- data/lib/text/hyphen/language/1.8/it.rb +133 -0
- data/lib/text/hyphen/language/1.8/la.rb +132 -0
- data/lib/text/hyphen/language/1.8/mn.rb +101 -0
- data/lib/text/hyphen/language/1.8/nl.rb +1250 -0
- data/lib/text/hyphen/language/1.8/no1.rb +299 -0
- data/lib/text/hyphen/language/1.8/no2.rb +134 -0
- data/lib/text/hyphen/language/1.8/pl.rb +478 -0
- data/lib/text/hyphen/language/1.8/pt.rb +54 -0
- data/lib/text/hyphen/language/1.8/sv.rb +447 -0
- data/lib/text/hyphen/language/1.9/ca.rb +174 -0
- data/lib/text/hyphen/language/1.9/cs.rb +361 -0
- data/lib/text/hyphen/language/1.9/da.rb +117 -0
- data/lib/text/hyphen/language/1.9/de1.rb +719 -0
- data/lib/text/hyphen/language/1.9/de2.rb +682 -0
- data/lib/text/hyphen/language/1.9/en_uk.rb +791 -0
- data/lib/text/hyphen/language/1.9/en_us.rb +492 -0
- data/lib/text/hyphen/language/1.9/es.rb +289 -0
- data/lib/text/hyphen/language/1.9/et.rb +336 -0
- data/lib/text/hyphen/language/1.9/eu.rb +114 -0
- data/lib/text/hyphen/language/1.9/fi.rb +113 -0
- data/lib/text/hyphen/language/1.9/fr.rb +391 -0
- data/lib/text/hyphen/language/1.9/ga.rb +608 -0
- data/lib/text/hyphen/language/1.9/hr.rb +123 -0
- data/lib/text/hyphen/language/1.9/hsb.rb +180 -0
- data/lib/text/hyphen/language/1.9/hu1.rb +382 -0
- data/lib/text/hyphen/language/1.9/hu2.rb +1280 -0
- data/lib/text/hyphen/language/1.9/ia.rb +73 -0
- data/lib/text/hyphen/language/1.9/id.rb +93 -0
- data/lib/text/hyphen/language/1.9/is.rb +388 -0
- data/lib/text/hyphen/language/1.9/it.rb +134 -0
- data/lib/text/hyphen/language/1.9/la.rb +134 -0
- data/lib/text/hyphen/language/1.9/mn.rb +102 -0
- data/lib/text/hyphen/language/1.9/nl.rb +1252 -0
- data/lib/text/hyphen/language/1.9/no1.rb +301 -0
- data/lib/text/hyphen/language/1.9/no2.rb +136 -0
- data/lib/text/hyphen/language/1.9/pl.rb +479 -0
- data/lib/text/hyphen/language/1.9/pt.rb +55 -0
- data/lib/text/hyphen/language/1.9/sv.rb +449 -0
- data/lib/text/hyphen/language/ca.rb +3 -173
- data/lib/text/hyphen/language/cs.rb +3 -362
- data/lib/text/hyphen/language/da.rb +3 -117
- data/lib/text/hyphen/language/de.rb +1 -0
- data/lib/text/hyphen/language/de1.rb +3 -724
- data/lib/text/hyphen/language/de2.rb +3 -685
- data/lib/text/hyphen/language/en_uk.rb +3 -790
- data/lib/text/hyphen/language/en_us.rb +3 -492
- data/lib/text/hyphen/language/es.rb +3 -288
- data/lib/text/hyphen/language/et.rb +3 -336
- data/lib/text/hyphen/language/eu.rb +3 -114
- data/lib/text/hyphen/language/fi.rb +3 -112
- data/lib/text/hyphen/language/fr.rb +3 -391
- data/lib/text/hyphen/language/ga.rb +3 -607
- data/lib/text/hyphen/language/hr.rb +3 -123
- data/lib/text/hyphen/language/hsb.rb +2 -179
- data/lib/text/hyphen/language/hu.rb +1 -0
- data/lib/text/hyphen/language/hu1.rb +3 -384
- data/lib/text/hyphen/language/hu2.rb +3 -1282
- data/lib/text/hyphen/language/ia.rb +3 -72
- data/lib/text/hyphen/language/id.rb +3 -96
- data/lib/text/hyphen/language/is.rb +3 -389
- data/lib/text/hyphen/language/it.rb +3 -134
- data/lib/text/hyphen/language/la.rb +3 -133
- data/lib/text/hyphen/language/mn.rb +3 -102
- data/lib/text/hyphen/language/ms.rb +9 -0
- data/lib/text/hyphen/language/nl.rb +3 -1252
- data/lib/text/hyphen/language/no.rb +1 -0
- data/lib/text/hyphen/language/no1.rb +3 -302
- data/lib/text/hyphen/language/no2.rb +3 -137
- data/lib/text/hyphen/language/pl.rb +3 -479
- data/lib/text/hyphen/language/pt.rb +3 -55
- data/lib/text/hyphen/language/sv.rb +3 -448
- data/test/data/bug_9807_latin1.rb +10 -0
- data/test/data/bug_9807_utf-8.rb +10 -0
- data/test/test_bugs.rb +14 -4
- data/test/test_text_hyphen.rb +3 -3
- data/text-hyphen.gemspec +29 -29
- metadata +101 -40
- data/COPYING.txt +0 -339
- data/History.txt +0 -23
- data/LICENCE.txt +0 -47
- data/README.txt +0 -82
data/.gemtest
ADDED
File without changes
|
data/History.rdoc
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
== 1.2 / 2011.07.17
|
2
|
+
* Major Enhancements:
|
3
|
+
* This release supports Ruby 1.9 with UTF-8 encodings. The language files are
|
4
|
+
duplicated for both Ruby 1.8 and 1.9 and the correct version is loaded
|
5
|
+
based on RUBY\_VERSION.
|
6
|
+
* Minor Enhancements:
|
7
|
+
* Making Hungarian and Norwegian language files act like the German language
|
8
|
+
files (both of these languages have two alternative hyphenation tables).
|
9
|
+
* Added a Malasy language file that should work correctly.
|
10
|
+
* Cleaned up the documentation.
|
11
|
+
* Bug Fixes:
|
12
|
+
* Fixed 9807 and 28128 (previously noted as not reproducible; quality
|
13
|
+
reproduction cases were found).
|
14
|
+
* Changes:
|
15
|
+
* Relicensing the core library to the MIT license and attempting to clarify
|
16
|
+
the license situation.
|
17
|
+
* Renaming hyphen to ruby-hyphen.
|
18
|
+
* This is the final release compatible with Ruby 1.8, first release
|
19
|
+
compatible with Ruby 1.9.
|
20
|
+
* Test Coverage:
|
21
|
+
* Tested with all major Ruby releases except IronRuby.
|
22
|
+
* jruby-1.6.3 (1.8.7): passes all tests.
|
23
|
+
* jruby-1.6.3 (1.9.2): fails one UTF-8 related test (see JRUBY-5927).
|
24
|
+
* macruby-0.10 (1.9.2): passes all tests. Segfault with "rake test" (see
|
25
|
+
https://www.macruby.org/trac/ticket/1362 for more information).
|
26
|
+
* maglev-ruby-0.9 (1.8.7): passes all tests.
|
27
|
+
* rubinius-1.2.5 (1.8.7): passes all tests.
|
28
|
+
* ree-1.8.7-2011.03 (1.8.7): passes all tests.
|
29
|
+
* MRI-1.8.7-p352 (1.8.7): passes all tests.
|
30
|
+
* MRI-1.9.2-p290 (1.9.2): passes all tests.
|
31
|
+
|
32
|
+
== 1.0.2 / 2011.02.09
|
33
|
+
* Moved to 'hoe' and GitHub.
|
34
|
+
* Preparing for 2.0 which will be Ruby 1.9-only for UTF-8.
|
35
|
+
* Fixing German support (RubyForge 28498):
|
36
|
+
* Choosing 'de' as a language will load 'de1'. Choosing 'de1' or 'de2' will
|
37
|
+
load properly now, but they will be reported with an ISO language code of
|
38
|
+
'de' (new optional #isocode attribute on a language definition that will
|
39
|
+
override the #iso_language setting of a Text::Hyphen instance if set).
|
40
|
+
* Both 'de1' and 'de2' can be loaded simultaneously now, but the first one
|
41
|
+
loaded will claim the Text::Hyphen::Language::DE constant.
|
42
|
+
* Added test cases for bugs:
|
43
|
+
* RubyForge 9807 (cannot reproduce)
|
44
|
+
* RubyForge 28128 (cannot reproduce)
|
45
|
+
* RubyForge 28498
|
46
|
+
|
47
|
+
== 1.0.1
|
48
|
+
* Minor modification to the RubyGem release of Text::Hyphen to enable the
|
49
|
+
hyphen command-line program.
|
50
|
+
|
51
|
+
== 1.0.0
|
52
|
+
* Initial version based on TeX::Hyphen 0.4.0 (some changes have been
|
53
|
+
backported to TeX::Hyphen 0.5.0).
|
54
|
+
* Incorporated many hyphenation pattern files from CTAN.
|
data/License.rdoc
ADDED
@@ -0,0 +1,159 @@
|
|
1
|
+
== License
|
2
|
+
|
3
|
+
Licensing for Text::Hyphen is unfortunately complex because of the various
|
4
|
+
copyrights and licenses of the source hyphenation files that have been
|
5
|
+
converted to Ruby format. Some of these files are available only under the TeX
|
6
|
+
license and others are available only under the GNU GPL while others are public
|
7
|
+
domain. Each language file has these licenses embedded within the file. Please
|
8
|
+
consult each file's license to ensure that it is compatible with your
|
9
|
+
application.
|
10
|
+
|
11
|
+
The Text::Hyphen library software, the application ruby-hyphen, and the library
|
12
|
+
(gem) as a compilation is licensed under the terms of the MIT license. The
|
13
|
+
files in this distribution covered by this license are in the list below called
|
14
|
+
"Library Files".
|
15
|
+
|
16
|
+
Individual language hyphenation files (in the list called "Language Files") are
|
17
|
+
maintained under the license described in the language file itself; the
|
18
|
+
copyright for these original files is held by the original authors; any
|
19
|
+
mistakes made in conversion of these files to Ruby is attributable to the
|
20
|
+
contributors of the Text::Hyphen package only. If license information is not
|
21
|
+
present in a given Language File, it should be considered under the terms of
|
22
|
+
TeX.
|
23
|
+
|
24
|
+
=== Library License
|
25
|
+
* Copyright Austin Ziegler, 2004–2011.
|
26
|
+
|
27
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
28
|
+
this software and associated documentation files (the "Software"), to deal in
|
29
|
+
the Software without restriction, including without limitation the rights to
|
30
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
31
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
32
|
+
so, subject to the following conditions:
|
33
|
+
|
34
|
+
The above copyright notice and this permission notice shall be included in all
|
35
|
+
copies or substantial portions of the Software.
|
36
|
+
|
37
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
38
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
39
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
40
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
41
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
42
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
43
|
+
SOFTWARE.
|
44
|
+
|
45
|
+
The copyright on the Text::Hyphen application/library and the Ruby
|
46
|
+
translations of hyphenation files belongs to Austin Ziegler. All other
|
47
|
+
copyrights on original versions still stand; Text::Hyphen is a derivative work
|
48
|
+
of these and other projects.
|
49
|
+
|
50
|
+
=== Library Files
|
51
|
+
Note that while this list appears to include language files, these are "loader"
|
52
|
+
files only and do not contain the hyphenation patterns themselves.
|
53
|
+
|
54
|
+
* lib/text-hyphen.rb
|
55
|
+
* lib/text/hyphen.rb
|
56
|
+
* lib/text/hyphen/language.rb
|
57
|
+
* lib/text/hyphen/language/ca.rb
|
58
|
+
* lib/text/hyphen/language/cs.rb
|
59
|
+
* lib/text/hyphen/language/da.rb
|
60
|
+
* lib/text/hyphen/language/de.rb
|
61
|
+
* lib/text/hyphen/language/de1.rb
|
62
|
+
* lib/text/hyphen/language/de2.rb
|
63
|
+
* lib/text/hyphen/language/en_uk.rb
|
64
|
+
* lib/text/hyphen/language/en_us.rb
|
65
|
+
* lib/text/hyphen/language/es.rb
|
66
|
+
* lib/text/hyphen/language/et.rb
|
67
|
+
* lib/text/hyphen/language/eu.rb
|
68
|
+
* lib/text/hyphen/language/fi.rb
|
69
|
+
* lib/text/hyphen/language/fr.rb
|
70
|
+
* lib/text/hyphen/language/ga.rb
|
71
|
+
* lib/text/hyphen/language/hr.rb
|
72
|
+
* lib/text/hyphen/language/hsb.rb
|
73
|
+
* lib/text/hyphen/language/hu.rb
|
74
|
+
* lib/text/hyphen/language/hu1.rb
|
75
|
+
* lib/text/hyphen/language/hu2.rb
|
76
|
+
* lib/text/hyphen/language/ia.rb
|
77
|
+
* lib/text/hyphen/language/id.rb
|
78
|
+
* lib/text/hyphen/language/is.rb
|
79
|
+
* lib/text/hyphen/language/it.rb
|
80
|
+
* lib/text/hyphen/language/la.rb
|
81
|
+
* lib/text/hyphen/language/mn.rb
|
82
|
+
* lib/text/hyphen/language/ms.rb
|
83
|
+
* lib/text/hyphen/language/nl.rb
|
84
|
+
* lib/text/hyphen/language/no.rb
|
85
|
+
* lib/text/hyphen/language/no1.rb
|
86
|
+
* lib/text/hyphen/language/no2.rb
|
87
|
+
* lib/text/hyphen/language/pl.rb
|
88
|
+
* lib/text/hyphen/language/pt.rb
|
89
|
+
* lib/text/hyphen/language/sv.rb
|
90
|
+
* test/data/bug_9807_latin1.rb
|
91
|
+
* test/data/bug_9807_utf-8.rb
|
92
|
+
* test/test_bugs.rb
|
93
|
+
* test/test_text_hyphen.rb
|
94
|
+
* text-hyphen.gemspec
|
95
|
+
* History.rdoc
|
96
|
+
* License.rdoc
|
97
|
+
* README.rdoc
|
98
|
+
* Rakefile
|
99
|
+
* bin/ruby-hyphen
|
100
|
+
|
101
|
+
=== Language Files
|
102
|
+
* lib/text/hyphen/language/1.8/ca.rb
|
103
|
+
* lib/text/hyphen/language/1.8/cs.rb
|
104
|
+
* lib/text/hyphen/language/1.8/da.rb
|
105
|
+
* lib/text/hyphen/language/1.8/de1.rb
|
106
|
+
* lib/text/hyphen/language/1.8/de2.rb
|
107
|
+
* lib/text/hyphen/language/1.8/en_uk.rb
|
108
|
+
* lib/text/hyphen/language/1.8/en_us.rb
|
109
|
+
* lib/text/hyphen/language/1.8/es.rb
|
110
|
+
* lib/text/hyphen/language/1.8/et.rb
|
111
|
+
* lib/text/hyphen/language/1.8/eu.rb
|
112
|
+
* lib/text/hyphen/language/1.8/fi.rb
|
113
|
+
* lib/text/hyphen/language/1.8/fr.rb
|
114
|
+
* lib/text/hyphen/language/1.8/ga.rb
|
115
|
+
* lib/text/hyphen/language/1.8/hr.rb
|
116
|
+
* lib/text/hyphen/language/1.8/hsb.rb
|
117
|
+
* lib/text/hyphen/language/1.8/hu1.rb
|
118
|
+
* lib/text/hyphen/language/1.8/hu2.rb
|
119
|
+
* lib/text/hyphen/language/1.8/ia.rb
|
120
|
+
* lib/text/hyphen/language/1.8/id.rb
|
121
|
+
* lib/text/hyphen/language/1.8/is.rb
|
122
|
+
* lib/text/hyphen/language/1.8/it.rb
|
123
|
+
* lib/text/hyphen/language/1.8/la.rb
|
124
|
+
* lib/text/hyphen/language/1.8/mn.rb
|
125
|
+
* lib/text/hyphen/language/1.8/nl.rb
|
126
|
+
* lib/text/hyphen/language/1.8/no1.rb
|
127
|
+
* lib/text/hyphen/language/1.8/no2.rb
|
128
|
+
* lib/text/hyphen/language/1.8/pl.rb
|
129
|
+
* lib/text/hyphen/language/1.8/pt.rb
|
130
|
+
* lib/text/hyphen/language/1.8/sv.rb
|
131
|
+
* lib/text/hyphen/language/1.9/ca.rb
|
132
|
+
* lib/text/hyphen/language/1.9/cs.rb
|
133
|
+
* lib/text/hyphen/language/1.9/da.rb
|
134
|
+
* lib/text/hyphen/language/1.9/de1.rb
|
135
|
+
* lib/text/hyphen/language/1.9/de2.rb
|
136
|
+
* lib/text/hyphen/language/1.9/en_uk.rb
|
137
|
+
* lib/text/hyphen/language/1.9/en_us.rb
|
138
|
+
* lib/text/hyphen/language/1.9/es.rb
|
139
|
+
* lib/text/hyphen/language/1.9/et.rb
|
140
|
+
* lib/text/hyphen/language/1.9/eu.rb
|
141
|
+
* lib/text/hyphen/language/1.9/fi.rb
|
142
|
+
* lib/text/hyphen/language/1.9/fr.rb
|
143
|
+
* lib/text/hyphen/language/1.9/ga.rb
|
144
|
+
* lib/text/hyphen/language/1.9/hr.rb
|
145
|
+
* lib/text/hyphen/language/1.9/hsb.rb
|
146
|
+
* lib/text/hyphen/language/1.9/hu1.rb
|
147
|
+
* lib/text/hyphen/language/1.9/hu2.rb
|
148
|
+
* lib/text/hyphen/language/1.9/ia.rb
|
149
|
+
* lib/text/hyphen/language/1.9/id.rb
|
150
|
+
* lib/text/hyphen/language/1.9/is.rb
|
151
|
+
* lib/text/hyphen/language/1.9/it.rb
|
152
|
+
* lib/text/hyphen/language/1.9/la.rb
|
153
|
+
* lib/text/hyphen/language/1.9/mn.rb
|
154
|
+
* lib/text/hyphen/language/1.9/nl.rb
|
155
|
+
* lib/text/hyphen/language/1.9/no1.rb
|
156
|
+
* lib/text/hyphen/language/1.9/no2.rb
|
157
|
+
* lib/text/hyphen/language/1.9/pl.rb
|
158
|
+
* lib/text/hyphen/language/1.9/pt.rb
|
159
|
+
* lib/text/hyphen/language/1.9/sv.rb
|
data/Manifest.txt
CHANGED
@@ -1,14 +1,71 @@
|
|
1
1
|
.autotest
|
2
|
-
|
3
|
-
|
4
|
-
LICENCE.txt
|
2
|
+
History.rdoc
|
3
|
+
License.rdoc
|
5
4
|
Manifest.txt
|
6
|
-
README.
|
5
|
+
README.rdoc
|
7
6
|
Rakefile
|
8
|
-
bin/hyphen
|
7
|
+
bin/ruby-hyphen
|
9
8
|
lib/text-hyphen.rb
|
10
9
|
lib/text/hyphen.rb
|
11
10
|
lib/text/hyphen/language.rb
|
11
|
+
lib/text/hyphen/language/1.8/ca.rb
|
12
|
+
lib/text/hyphen/language/1.8/cs.rb
|
13
|
+
lib/text/hyphen/language/1.8/da.rb
|
14
|
+
lib/text/hyphen/language/1.8/de1.rb
|
15
|
+
lib/text/hyphen/language/1.8/de2.rb
|
16
|
+
lib/text/hyphen/language/1.8/en_uk.rb
|
17
|
+
lib/text/hyphen/language/1.8/en_us.rb
|
18
|
+
lib/text/hyphen/language/1.8/es.rb
|
19
|
+
lib/text/hyphen/language/1.8/et.rb
|
20
|
+
lib/text/hyphen/language/1.8/eu.rb
|
21
|
+
lib/text/hyphen/language/1.8/fi.rb
|
22
|
+
lib/text/hyphen/language/1.8/fr.rb
|
23
|
+
lib/text/hyphen/language/1.8/ga.rb
|
24
|
+
lib/text/hyphen/language/1.8/hr.rb
|
25
|
+
lib/text/hyphen/language/1.8/hsb.rb
|
26
|
+
lib/text/hyphen/language/1.8/hu1.rb
|
27
|
+
lib/text/hyphen/language/1.8/hu2.rb
|
28
|
+
lib/text/hyphen/language/1.8/ia.rb
|
29
|
+
lib/text/hyphen/language/1.8/id.rb
|
30
|
+
lib/text/hyphen/language/1.8/is.rb
|
31
|
+
lib/text/hyphen/language/1.8/it.rb
|
32
|
+
lib/text/hyphen/language/1.8/la.rb
|
33
|
+
lib/text/hyphen/language/1.8/mn.rb
|
34
|
+
lib/text/hyphen/language/1.8/nl.rb
|
35
|
+
lib/text/hyphen/language/1.8/no1.rb
|
36
|
+
lib/text/hyphen/language/1.8/no2.rb
|
37
|
+
lib/text/hyphen/language/1.8/pl.rb
|
38
|
+
lib/text/hyphen/language/1.8/pt.rb
|
39
|
+
lib/text/hyphen/language/1.8/sv.rb
|
40
|
+
lib/text/hyphen/language/1.9/ca.rb
|
41
|
+
lib/text/hyphen/language/1.9/cs.rb
|
42
|
+
lib/text/hyphen/language/1.9/da.rb
|
43
|
+
lib/text/hyphen/language/1.9/de1.rb
|
44
|
+
lib/text/hyphen/language/1.9/de2.rb
|
45
|
+
lib/text/hyphen/language/1.9/en_uk.rb
|
46
|
+
lib/text/hyphen/language/1.9/en_us.rb
|
47
|
+
lib/text/hyphen/language/1.9/es.rb
|
48
|
+
lib/text/hyphen/language/1.9/et.rb
|
49
|
+
lib/text/hyphen/language/1.9/eu.rb
|
50
|
+
lib/text/hyphen/language/1.9/fi.rb
|
51
|
+
lib/text/hyphen/language/1.9/fr.rb
|
52
|
+
lib/text/hyphen/language/1.9/ga.rb
|
53
|
+
lib/text/hyphen/language/1.9/hr.rb
|
54
|
+
lib/text/hyphen/language/1.9/hsb.rb
|
55
|
+
lib/text/hyphen/language/1.9/hu1.rb
|
56
|
+
lib/text/hyphen/language/1.9/hu2.rb
|
57
|
+
lib/text/hyphen/language/1.9/ia.rb
|
58
|
+
lib/text/hyphen/language/1.9/id.rb
|
59
|
+
lib/text/hyphen/language/1.9/is.rb
|
60
|
+
lib/text/hyphen/language/1.9/it.rb
|
61
|
+
lib/text/hyphen/language/1.9/la.rb
|
62
|
+
lib/text/hyphen/language/1.9/mn.rb
|
63
|
+
lib/text/hyphen/language/1.9/nl.rb
|
64
|
+
lib/text/hyphen/language/1.9/no1.rb
|
65
|
+
lib/text/hyphen/language/1.9/no2.rb
|
66
|
+
lib/text/hyphen/language/1.9/pl.rb
|
67
|
+
lib/text/hyphen/language/1.9/pt.rb
|
68
|
+
lib/text/hyphen/language/1.9/sv.rb
|
12
69
|
lib/text/hyphen/language/ca.rb
|
13
70
|
lib/text/hyphen/language/cs.rb
|
14
71
|
lib/text/hyphen/language/da.rb
|
@@ -25,6 +82,7 @@ lib/text/hyphen/language/fr.rb
|
|
25
82
|
lib/text/hyphen/language/ga.rb
|
26
83
|
lib/text/hyphen/language/hr.rb
|
27
84
|
lib/text/hyphen/language/hsb.rb
|
85
|
+
lib/text/hyphen/language/hu.rb
|
28
86
|
lib/text/hyphen/language/hu1.rb
|
29
87
|
lib/text/hyphen/language/hu2.rb
|
30
88
|
lib/text/hyphen/language/ia.rb
|
@@ -33,12 +91,16 @@ lib/text/hyphen/language/is.rb
|
|
33
91
|
lib/text/hyphen/language/it.rb
|
34
92
|
lib/text/hyphen/language/la.rb
|
35
93
|
lib/text/hyphen/language/mn.rb
|
94
|
+
lib/text/hyphen/language/ms.rb
|
36
95
|
lib/text/hyphen/language/nl.rb
|
96
|
+
lib/text/hyphen/language/no.rb
|
37
97
|
lib/text/hyphen/language/no1.rb
|
38
98
|
lib/text/hyphen/language/no2.rb
|
39
99
|
lib/text/hyphen/language/pl.rb
|
40
100
|
lib/text/hyphen/language/pt.rb
|
41
101
|
lib/text/hyphen/language/sv.rb
|
102
|
+
test/data/bug_9807_latin1.rb
|
103
|
+
test/data/bug_9807_utf-8.rb
|
42
104
|
test/test_bugs.rb
|
43
105
|
test/test_text_hyphen.rb
|
44
106
|
text-hyphen.gemspec
|
data/README.rdoc
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
= text-hyphen
|
2
|
+
|
3
|
+
== Description
|
4
|
+
|
5
|
+
Text::Hyphen is a Ruby library to hyphenate words in various languages using
|
6
|
+
Ruby-fied versions of TeX hyphenation patterns. It will properly hyphenate
|
7
|
+
various words according to the rules of the language the word is written in.
|
8
|
+
The algorithm is based on that of the TeX typesetting system by Donald E.
|
9
|
+
Knuth.
|
10
|
+
|
11
|
+
This is originally based on the Perl implementation of
|
12
|
+
{TeX::Hyphen}[http://search.cpan.org/author/JANPAZ/TeX-Hyphen-0.140/lib/TeX/Hyphen.pm]
|
13
|
+
and the {Ruby port}[http://rubyforge.org/projects/text-format]. The language
|
14
|
+
hyphenation pattern files are based on the sources available from
|
15
|
+
{CTAN}[http://www.ctan.org] as of 2004.12.19 and have been manually translated
|
16
|
+
by Austin Ziegler.
|
17
|
+
|
18
|
+
This release is 1.2. This is a major release providing both Ruby 1.8.7 and Ruby
|
19
|
+
1.9.2 support. This is the last major release supporting Ruby 1.8 interpreters.
|
20
|
+
Future versions will only work with Ruby 1.9 or later interpreters.
|
21
|
+
|
22
|
+
== Where
|
23
|
+
|
24
|
+
* {RubyForge}[http://rubyforge.org/projects/text-format/]
|
25
|
+
* {RubyGems}[https://rubygems.org/gems/text-hyphen]
|
26
|
+
* {GitHub}[https://github.com/halostatue/text-hyphen/]
|
27
|
+
* {RDoc}[http://rdoc.info/github/halostatue/text-hyphen/master/frames]
|
28
|
+
|
29
|
+
== Synopsis
|
30
|
+
|
31
|
+
require 'text/hyphen'
|
32
|
+
hh = Text::Hyphen.new(:language => 'en_us', :left => 2, :right => 2)
|
33
|
+
# Defaults to the above
|
34
|
+
hh = TeX::Hyphen.new
|
35
|
+
|
36
|
+
word = "representation"
|
37
|
+
points = hyp.hyphenate(word) #=> [3, 5, 8, 10]
|
38
|
+
puts hyp.visualize(word) #=> rep-re-sen-ta-tion
|
39
|
+
|
40
|
+
Text::Hyphen is truly multilingual, with 29 languages or language variants
|
41
|
+
supported. As an example, consider the difference between the following:
|
42
|
+
|
43
|
+
require 'text/hyphen'
|
44
|
+
# Using left and right minimum values of 0 ensures that you will see all
|
45
|
+
# possible hyphenation points, not just those that meet the minimum width
|
46
|
+
# requirements.
|
47
|
+
en = Text::Hyphen.new(:left => 0, :right => 0)
|
48
|
+
fr = Text::Hyphen.new(:language => "fr", :left => 0, :right => 0)
|
49
|
+
|
50
|
+
puts en.visualise("organiser") #=> or-gan-iser
|
51
|
+
puts fr.visualise("organiser") #=> or-ga-ni-ser
|
52
|
+
|
53
|
+
As you can see, the hyphenation is distinct between the two hyphenators.
|
54
|
+
Additional improvements over TeX::Hyphen include thread safety (except for
|
55
|
+
debug control) and support for UTF-8 under Ruby 1.9.
|
56
|
+
|
57
|
+
== Install
|
58
|
+
gem install text-hyphen
|
59
|
+
|
60
|
+
== Developers
|
61
|
+
|
62
|
+
After checking out the source, run:
|
63
|
+
|
64
|
+
$ rake newb
|
65
|
+
|
66
|
+
This task will install any missing dependencies, run the tests/specs,
|
67
|
+
and generate the RDoc.
|
68
|
+
|
69
|
+
:include: License.rdoc
|
data/Rakefile
CHANGED
@@ -1,19 +1,23 @@
|
|
1
|
-
# -*- ruby -*-
|
1
|
+
# -*- ruby encoding: utf-8 -*-
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'hoe'
|
5
|
-
require 'rubyforge'
|
6
5
|
|
7
6
|
Hoe.plugin :doofus
|
8
7
|
Hoe.plugin :gemspec
|
9
8
|
Hoe.plugin :git
|
10
9
|
Hoe.plugin :rubyforge
|
11
10
|
|
12
|
-
Hoe.spec 'text-hyphen' do
|
11
|
+
Hoe.spec 'text-hyphen' do |spec|
|
13
12
|
developer('Austin Ziegler', 'austin@rubyforge.org')
|
13
|
+
|
14
14
|
self.rubyforge_name = 'text-format'
|
15
|
+
spec.remote_rdoc_dir = 'text-hyphen/rdoc'
|
16
|
+
spec.rsync_args << ' --exclude=statsvn/'
|
15
17
|
|
16
|
-
|
18
|
+
spec.history_file = 'History.rdoc'
|
19
|
+
spec.readme_file = 'README.rdoc'
|
20
|
+
spec.extra_rdoc_files = FileList["*.rdoc"].to_a
|
17
21
|
|
18
22
|
self.extra_dev_deps << ['hoe-doofus', '~> 1.0']
|
19
23
|
self.extra_dev_deps << ['hoe-gemspec', '~> 1.0']
|
data/bin/{hyphen → ruby-hyphen}
RENAMED
File without changes
|
data/lib/text-hyphen.rb
CHANGED
data/lib/text/hyphen.rb
CHANGED
@@ -1,95 +1,26 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
#
|
6
|
-
#
|
7
|
-
#
|
8
|
-
# require 'text/hyphen'
|
9
|
-
# hh = Text::Hyphen.new(:language => 'en_us', :left => 2, :right => 2)
|
10
|
-
# # Defaults to the above
|
11
|
-
# hh = TeX::Hyphen.new
|
12
|
-
#
|
13
|
-
# word = "representation"
|
14
|
-
# points = hyp.hyphenate(word) #=> [3, 5, 8, 10]
|
15
|
-
# puts hyp.visualize(word) #=> rep-re-sen-ta-tion
|
16
|
-
#
|
17
|
-
# en = Text::Hyphen.new(:left => 0, :right => 0)
|
18
|
-
# fr = Text::Hyphen.new(:language = "fr", :left => 0, :right => 0)
|
19
|
-
# puts en.visualise("organiser") #=> or-gan-iser
|
20
|
-
# puts fr.visualise("organiser") #=> or-ga-ni-ser
|
21
|
-
#
|
22
|
-
# == Description
|
23
|
-
# Creates a new Hyphen object and loads the language patterns into memory.
|
24
|
-
# The hyphenator can then be asked for the hyphenation of a word. If no
|
25
|
-
# language is specified, then the language en_us (EN_US) is used by default.
|
26
|
-
#
|
27
|
-
# Copyright:: Copyright (c) 2004 - 2005 Austin Ziegler
|
28
|
-
# Version:: 1.0.2
|
29
|
-
# Based On:: <tt>TeX::Hyphen</tt> 0.4 Copyright (c) 2003 - 2004
|
30
|
-
# Martin DeMello and Austin Ziegler, in turn based on
|
31
|
-
# Perl's <tt>TeX::Hyphen</tt>
|
32
|
-
# [http://search.cpan.org/author/JANPAZ/TeX-Hyphen-0.140/lib/TeX/Hyphen.pm]
|
33
|
-
# Copyright (c) 1997 - 2002 Jan Pazdziora
|
34
|
-
#
|
35
|
-
# == Licence
|
36
|
-
# Licensing for Text::Hyphen is unfortunately complex because of the various
|
37
|
-
# copyrights and licences of the source hyphenation files. Some of these
|
38
|
-
# files are available only under the TeX licence and others are available
|
39
|
-
# only under the GNU GPL while others are public domain. Each language file
|
40
|
-
# has these licences embedded within the file. Please consult each file's
|
41
|
-
# licence to ensure that it is compatible with your application.
|
42
|
-
#
|
43
|
-
# The copyright on the Text::Hyphen application/library and the Ruby
|
44
|
-
# translations of hyphenation files belongs to Austin Ziegler. All other
|
45
|
-
# copyrights on original versions still stand; Text::Hyphen is a derivative
|
46
|
-
# work of these and other projects.
|
47
|
-
#
|
48
|
-
# === Application and Compilation Licences
|
49
|
-
# Text::Hyphen, the application/library is licensed under the same terms as
|
50
|
-
# Ruby. Note that this specifically refers to the contents of bin/hyphen,
|
51
|
-
# lib/text/hyphen.rb, and lib/text/hyphen/language.rb.
|
52
|
-
#
|
53
|
-
# Individual language hyphenation files are NOT licensed under these terms,
|
54
|
-
# but under the following MIT-style licence and the original hyphenation
|
55
|
-
# pattern licenses. The copyright for the original TeX hyphenation files is
|
56
|
-
# held by the original authors; any mistakes in conversion of these files to
|
57
|
-
# Ruby is attributable to the contributors to the Text::Hyphen package only.
|
58
|
-
#
|
59
|
-
# The compilation package Text::Hyphen is licensed under the same terms as
|
60
|
-
# Ruby.
|
61
|
-
#
|
62
|
-
# === Blanket Language Hyphenation File Licence
|
63
|
-
# Permission is hereby granted, free of charge, to any person obtaining a
|
64
|
-
# copy of this software and associated documentation files (the "Software"),
|
65
|
-
# to deal in the Software without restriction, including without limitation
|
66
|
-
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
67
|
-
# and/or sell copies of the Software, and to permit persons to whom the
|
68
|
-
# Software is furnished to do so, subject to the following conditions:
|
69
|
-
#
|
70
|
-
# The above copyright notice and this permission notice shall be included in
|
71
|
-
# all copies or substantial portions of the Software.
|
72
|
-
#
|
73
|
-
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
74
|
-
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
75
|
-
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
76
|
-
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
77
|
-
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
78
|
-
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
79
|
-
# DEALINGS IN THE SOFTWARE.
|
1
|
+
# -*- ruby encoding: utf-8 -*-
|
2
|
+
module Text # :nodoc:
|
3
|
+
end
|
4
|
+
|
5
|
+
# An object that knows how to perform hyphenation based on the TeX
|
6
|
+
# hyphenation algorithm with pattern files. Each object is constructed with
|
7
|
+
# a specific language's hyphenation patterns.
|
80
8
|
class Text::Hyphen
|
81
9
|
DEBUG = false
|
82
|
-
VERSION = '1.
|
10
|
+
VERSION = '1.2'
|
83
11
|
|
84
12
|
DEFAULT_MIN_LEFT = 2
|
85
13
|
DEFAULT_MIN_RIGHT = 2
|
86
14
|
|
87
15
|
# No fewer than this number of letters will show up to the left of the
|
88
|
-
# hyphen.
|
16
|
+
# hyphen. The initial value for this will be specified by the language;
|
17
|
+
# setting this value will override the language's defaults.
|
89
18
|
attr_accessor :left
|
19
|
+
|
90
20
|
# No fewer than this number of letters will show up to the right of the
|
91
21
|
# hyphen. This overrides the default specified in the language.
|
92
22
|
attr_accessor :right
|
23
|
+
|
93
24
|
# The name of the language to be used in hyphenating words. This will be a
|
94
25
|
# two or three character ISO 639 code, with the two character form being
|
95
26
|
# the canonical resource name. This will load the language hyphenation
|
@@ -101,14 +32,15 @@ class Text::Hyphen
|
|
101
32
|
# Minimal transformations will be performed on the language code provided,
|
102
33
|
# such that any dashes are converted to underscores (e.g., 'en-us' becomes
|
103
34
|
# 'en_us') and all characters are regularised. Resource names will be
|
104
|
-
# downcased and class names will be
|
105
|
-
# language becomes 'pt' and 'PT', respectively).
|
35
|
+
# downcased and class names will be converted to uppercase (e.g., 'Pt' for
|
36
|
+
# the Portuguese language becomes 'pt' and 'PT', respectively).
|
106
37
|
#
|
107
38
|
# The language may also be specified as an instance of
|
108
39
|
# Text::Hyphen::Language.
|
109
40
|
attr_accessor :language
|
41
|
+
|
110
42
|
undef :language=
|
111
|
-
def language=(lang)
|
43
|
+
def language=(lang) #:nodoc:
|
112
44
|
require 'text/hyphen/language' unless defined?(Text::Hyphen::Language)
|
113
45
|
if lang.kind_of? Text::Hyphen::Language
|
114
46
|
@iso_language = lang.to_s.split(%r{::}o)[-1].downcase
|
@@ -119,13 +51,27 @@ class Text::Hyphen
|
|
119
51
|
end
|
120
52
|
@iso_language
|
121
53
|
end
|
54
|
+
|
122
55
|
# Returns the language's ISO 639 ID, e.g., "en_us" or "pt".
|
123
|
-
attr_reader
|
56
|
+
attr_reader :iso_language
|
124
57
|
|
125
|
-
#
|
58
|
+
# Creates a hyphenation object with the options requested. The options
|
59
|
+
# available are:
|
126
60
|
#
|
127
|
-
#
|
128
|
-
#
|
61
|
+
# language:: The language to perform hyphenation with. See #language and
|
62
|
+
# #iso_language.
|
63
|
+
# left:: The minimum number of characters to the left of a
|
64
|
+
# hyphenation point. See #left.
|
65
|
+
# right:: The minimum number of characters to the right of a
|
66
|
+
# hyphenation point. See #right.
|
67
|
+
#
|
68
|
+
# The options can be provided either as hashed parameters or set as
|
69
|
+
# methods in an initialization block. The following initializations are
|
70
|
+
# all equivalent:
|
71
|
+
#
|
72
|
+
# hyp = Text::Hyphenate.new(:language => 'en_us')
|
73
|
+
# hyp = Text::Hyphenate.new(language: 'en_us') # under Ruby 1.9
|
74
|
+
# hyp = Text::Hyphenate.new { |h| h.language = 'en_us' }
|
129
75
|
def initialize(options = {}) # :yields self:
|
130
76
|
@iso_language = options[:language]
|
131
77
|
@left = options[:left]
|
@@ -147,16 +93,16 @@ class Text::Hyphen
|
|
147
93
|
|
148
94
|
load_language
|
149
95
|
|
150
|
-
@left
|
151
|
-
@right
|
96
|
+
@left ||= DEFAULT_MIN_LEFT
|
97
|
+
@right ||= DEFAULT_MIN_RIGHT
|
152
98
|
end
|
153
99
|
|
154
|
-
# Returns
|
100
|
+
# Returns an array of character positions where a word can be hyphenated.
|
155
101
|
#
|
156
|
-
# hyp.hyphenate('representation')
|
102
|
+
# hyp.hyphenate('representation') #=> [3, 5, 8 10]
|
157
103
|
#
|
158
|
-
#
|
159
|
-
# will be returned from a per-instance cache.
|
104
|
+
# Because hyphenation can be expensive, if the word has been hyphenated
|
105
|
+
# previously, it will be returned from a per-instance cache.
|
160
106
|
def hyphenate(word)
|
161
107
|
word = word.downcase
|
162
108
|
$stderr.puts "Hyphenating #{word}" if DEBUG
|
@@ -164,28 +110,32 @@ class Text::Hyphen
|
|
164
110
|
res = @language.exceptions[word]
|
165
111
|
return @cache[word] = make_result_list(res) if res
|
166
112
|
|
167
|
-
|
168
|
-
|
113
|
+
letters = word.scan(@language.scan_re)
|
114
|
+
$stderr.puts letters.inspect if DEBUG
|
115
|
+
word_size = letters.size
|
116
|
+
|
117
|
+
result = [0] * (word_size + 1)
|
118
|
+
right_stop = word_size - @right
|
169
119
|
|
170
120
|
updater = Proc.new do |hash, str, pos|
|
171
121
|
if hash.has_key?(str)
|
172
122
|
$stderr.print "#{pos}: #{str}: #{hash[str]}" if DEBUG
|
173
|
-
hash[str].
|
123
|
+
hash[str].scan(@language.scan_re).each_with_index do |cc, ii|
|
174
124
|
cc = cc.to_i
|
175
125
|
result[ii + pos] = cc if cc > result[ii + pos]
|
176
126
|
end
|
177
|
-
$stderr.print ": #{result}\n" if DEBUG
|
127
|
+
$stderr.print ": #{result.inspect}\n" if DEBUG
|
178
128
|
end
|
179
129
|
end
|
180
130
|
|
181
131
|
# Walk the word
|
182
|
-
(0..
|
183
|
-
|
184
|
-
(1..
|
185
|
-
substr =
|
132
|
+
(0..right_stop).each do |pos|
|
133
|
+
rest_length = word_size - pos
|
134
|
+
(1..rest_length).each do |length|
|
135
|
+
substr = letters[pos, length].join('')
|
186
136
|
updater[@language.hyphen, substr, pos]
|
187
137
|
updater[@language.start, substr, pos] if pos.zero?
|
188
|
-
updater[@language.stop, substr, pos] if (length ==
|
138
|
+
updater[@language.stop, substr, pos] if (length == rest_length)
|
189
139
|
end
|
190
140
|
end
|
191
141
|
|
@@ -196,23 +146,23 @@ class Text::Hyphen
|
|
196
146
|
@cache[word] = make_result_list(result)
|
197
147
|
end
|
198
148
|
|
199
|
-
# Returns a visualization of the hyphenation points
|
149
|
+
# Returns a visualization of the hyphenation points.
|
200
150
|
#
|
201
|
-
# hyp.visualize('representation')
|
151
|
+
# hyp.visualize('representation') #=> rep-re-sen-ta-tion
|
202
152
|
#
|
203
|
-
#
|
204
|
-
#
|
205
|
-
# per-instance cache.
|
153
|
+
# Because hyphenation can be expensive, if the word has been visualised
|
154
|
+
# previously, it will be returned from a per-instance cache.
|
206
155
|
def visualise(word)
|
207
156
|
return @vcache[word] if @vcache.has_key?(word)
|
208
157
|
w = word.dup
|
209
|
-
hyphenate(w).each_with_index do |pos, n|
|
158
|
+
hyphenate(w).each_with_index do |pos, n|
|
210
159
|
w[pos.to_i + n, 0] = '-' if pos != 0
|
211
160
|
end
|
212
161
|
@vcache[word] = w
|
213
162
|
end
|
214
163
|
alias visualize visualise
|
215
164
|
|
165
|
+
# Clears the per-instance hyphenation and visualization caches.
|
216
166
|
def clear_cache!
|
217
167
|
@cache.clear
|
218
168
|
@vcache.clear
|
@@ -229,7 +179,8 @@ class Text::Hyphen
|
|
229
179
|
end
|
230
180
|
end
|
231
181
|
|
232
|
-
# Returns
|
182
|
+
# Returns a string describing the structure of the patterns for the
|
183
|
+
# language of this hyphenation object.
|
233
184
|
def stats
|
234
185
|
_b = @language.both.size
|
235
186
|
_s = @language.start.size
|
@@ -254,7 +205,7 @@ EOS
|
|
254
205
|
def updateresult(hash, str, pos)
|
255
206
|
if hash.has_key?(str)
|
256
207
|
STDERR.print "#{pos}: #{str}: #{hash[str]}" if DEBUG
|
257
|
-
hash[str].
|
208
|
+
hash[str].scan(@language.scan_re).each_with_index do |c, i|
|
258
209
|
c = c.to_i
|
259
210
|
@result[i + pos] = c if c > @result[i + pos]
|
260
211
|
end
|
@@ -287,4 +238,16 @@ EOS
|
|
287
238
|
@iso_language
|
288
239
|
end
|
289
240
|
private :load_language
|
241
|
+
|
242
|
+
# Resolves a file for cleaner loading from a hyphenation loader file.
|
243
|
+
def self.require_real_hyphenation_file(loader) # :nodoc:
|
244
|
+
p = File.dirname(loader)
|
245
|
+
f = File.basename(loader)
|
246
|
+
v = if RUBY_VERSION < "1.9.1"
|
247
|
+
"1.8"
|
248
|
+
else
|
249
|
+
"1.9"
|
250
|
+
end
|
251
|
+
require File.join(p, v, f)
|
252
|
+
end
|
290
253
|
end
|