text-hyphen 1.0.2 → 1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (111) hide show
  1. data/.gemtest +0 -0
  2. data/History.rdoc +54 -0
  3. data/License.rdoc +159 -0
  4. data/Manifest.txt +67 -5
  5. data/README.rdoc +69 -0
  6. data/Rakefile +8 -4
  7. data/bin/{hyphen → ruby-hyphen} +0 -0
  8. data/lib/text-hyphen.rb +1 -0
  9. data/lib/text/hyphen.rb +74 -111
  10. data/lib/text/hyphen/language.rb +90 -26
  11. data/lib/text/hyphen/language/1.8/ca.rb +171 -0
  12. data/lib/text/hyphen/language/1.8/cs.rb +360 -0
  13. data/lib/text/hyphen/language/1.8/da.rb +117 -0
  14. data/lib/text/hyphen/language/1.8/de1.rb +718 -0
  15. data/lib/text/hyphen/language/1.8/de2.rb +680 -0
  16. data/lib/text/hyphen/language/1.8/en_uk.rb +789 -0
  17. data/lib/text/hyphen/language/1.8/en_us.rb +490 -0
  18. data/lib/text/hyphen/language/1.8/es.rb +287 -0
  19. data/lib/text/hyphen/language/1.8/et.rb +335 -0
  20. data/lib/text/hyphen/language/1.8/eu.rb +112 -0
  21. data/lib/text/hyphen/language/1.8/fi.rb +112 -0
  22. data/lib/text/hyphen/language/1.8/fr.rb +389 -0
  23. data/lib/text/hyphen/language/1.8/ga.rb +606 -0
  24. data/lib/text/hyphen/language/1.8/hr.rb +122 -0
  25. data/lib/text/hyphen/language/1.8/hsb.rb +179 -0
  26. data/lib/text/hyphen/language/1.8/hu1.rb +380 -0
  27. data/lib/text/hyphen/language/1.8/hu2.rb +1278 -0
  28. data/lib/text/hyphen/language/1.8/ia.rb +71 -0
  29. data/lib/text/hyphen/language/1.8/id.rb +91 -0
  30. data/lib/text/hyphen/language/1.8/is.rb +387 -0
  31. data/lib/text/hyphen/language/1.8/it.rb +133 -0
  32. data/lib/text/hyphen/language/1.8/la.rb +132 -0
  33. data/lib/text/hyphen/language/1.8/mn.rb +101 -0
  34. data/lib/text/hyphen/language/1.8/nl.rb +1250 -0
  35. data/lib/text/hyphen/language/1.8/no1.rb +299 -0
  36. data/lib/text/hyphen/language/1.8/no2.rb +134 -0
  37. data/lib/text/hyphen/language/1.8/pl.rb +478 -0
  38. data/lib/text/hyphen/language/1.8/pt.rb +54 -0
  39. data/lib/text/hyphen/language/1.8/sv.rb +447 -0
  40. data/lib/text/hyphen/language/1.9/ca.rb +174 -0
  41. data/lib/text/hyphen/language/1.9/cs.rb +361 -0
  42. data/lib/text/hyphen/language/1.9/da.rb +117 -0
  43. data/lib/text/hyphen/language/1.9/de1.rb +719 -0
  44. data/lib/text/hyphen/language/1.9/de2.rb +682 -0
  45. data/lib/text/hyphen/language/1.9/en_uk.rb +791 -0
  46. data/lib/text/hyphen/language/1.9/en_us.rb +492 -0
  47. data/lib/text/hyphen/language/1.9/es.rb +289 -0
  48. data/lib/text/hyphen/language/1.9/et.rb +336 -0
  49. data/lib/text/hyphen/language/1.9/eu.rb +114 -0
  50. data/lib/text/hyphen/language/1.9/fi.rb +113 -0
  51. data/lib/text/hyphen/language/1.9/fr.rb +391 -0
  52. data/lib/text/hyphen/language/1.9/ga.rb +608 -0
  53. data/lib/text/hyphen/language/1.9/hr.rb +123 -0
  54. data/lib/text/hyphen/language/1.9/hsb.rb +180 -0
  55. data/lib/text/hyphen/language/1.9/hu1.rb +382 -0
  56. data/lib/text/hyphen/language/1.9/hu2.rb +1280 -0
  57. data/lib/text/hyphen/language/1.9/ia.rb +73 -0
  58. data/lib/text/hyphen/language/1.9/id.rb +93 -0
  59. data/lib/text/hyphen/language/1.9/is.rb +388 -0
  60. data/lib/text/hyphen/language/1.9/it.rb +134 -0
  61. data/lib/text/hyphen/language/1.9/la.rb +134 -0
  62. data/lib/text/hyphen/language/1.9/mn.rb +102 -0
  63. data/lib/text/hyphen/language/1.9/nl.rb +1252 -0
  64. data/lib/text/hyphen/language/1.9/no1.rb +301 -0
  65. data/lib/text/hyphen/language/1.9/no2.rb +136 -0
  66. data/lib/text/hyphen/language/1.9/pl.rb +479 -0
  67. data/lib/text/hyphen/language/1.9/pt.rb +55 -0
  68. data/lib/text/hyphen/language/1.9/sv.rb +449 -0
  69. data/lib/text/hyphen/language/ca.rb +3 -173
  70. data/lib/text/hyphen/language/cs.rb +3 -362
  71. data/lib/text/hyphen/language/da.rb +3 -117
  72. data/lib/text/hyphen/language/de.rb +1 -0
  73. data/lib/text/hyphen/language/de1.rb +3 -724
  74. data/lib/text/hyphen/language/de2.rb +3 -685
  75. data/lib/text/hyphen/language/en_uk.rb +3 -790
  76. data/lib/text/hyphen/language/en_us.rb +3 -492
  77. data/lib/text/hyphen/language/es.rb +3 -288
  78. data/lib/text/hyphen/language/et.rb +3 -336
  79. data/lib/text/hyphen/language/eu.rb +3 -114
  80. data/lib/text/hyphen/language/fi.rb +3 -112
  81. data/lib/text/hyphen/language/fr.rb +3 -391
  82. data/lib/text/hyphen/language/ga.rb +3 -607
  83. data/lib/text/hyphen/language/hr.rb +3 -123
  84. data/lib/text/hyphen/language/hsb.rb +2 -179
  85. data/lib/text/hyphen/language/hu.rb +1 -0
  86. data/lib/text/hyphen/language/hu1.rb +3 -384
  87. data/lib/text/hyphen/language/hu2.rb +3 -1282
  88. data/lib/text/hyphen/language/ia.rb +3 -72
  89. data/lib/text/hyphen/language/id.rb +3 -96
  90. data/lib/text/hyphen/language/is.rb +3 -389
  91. data/lib/text/hyphen/language/it.rb +3 -134
  92. data/lib/text/hyphen/language/la.rb +3 -133
  93. data/lib/text/hyphen/language/mn.rb +3 -102
  94. data/lib/text/hyphen/language/ms.rb +9 -0
  95. data/lib/text/hyphen/language/nl.rb +3 -1252
  96. data/lib/text/hyphen/language/no.rb +1 -0
  97. data/lib/text/hyphen/language/no1.rb +3 -302
  98. data/lib/text/hyphen/language/no2.rb +3 -137
  99. data/lib/text/hyphen/language/pl.rb +3 -479
  100. data/lib/text/hyphen/language/pt.rb +3 -55
  101. data/lib/text/hyphen/language/sv.rb +3 -448
  102. data/test/data/bug_9807_latin1.rb +10 -0
  103. data/test/data/bug_9807_utf-8.rb +10 -0
  104. data/test/test_bugs.rb +14 -4
  105. data/test/test_text_hyphen.rb +3 -3
  106. data/text-hyphen.gemspec +29 -29
  107. metadata +101 -40
  108. data/COPYING.txt +0 -339
  109. data/History.txt +0 -23
  110. data/LICENCE.txt +0 -47
  111. data/README.txt +0 -82
File without changes
@@ -0,0 +1,54 @@
1
+ == 1.2 / 2011.07.17
2
+ * Major Enhancements:
3
+ * This release supports Ruby 1.9 with UTF-8 encodings. The language files are
4
+ duplicated for both Ruby 1.8 and 1.9 and the correct version is loaded
5
+ based on RUBY\_VERSION.
6
+ * Minor Enhancements:
7
+ * Making Hungarian and Norwegian language files act like the German language
8
+ files (both of these languages have two alternative hyphenation tables).
9
+ * Added a Malasy language file that should work correctly.
10
+ * Cleaned up the documentation.
11
+ * Bug Fixes:
12
+ * Fixed 9807 and 28128 (previously noted as not reproducible; quality
13
+ reproduction cases were found).
14
+ * Changes:
15
+ * Relicensing the core library to the MIT license and attempting to clarify
16
+ the license situation.
17
+ * Renaming hyphen to ruby-hyphen.
18
+ * This is the final release compatible with Ruby 1.8, first release
19
+ compatible with Ruby 1.9.
20
+ * Test Coverage:
21
+ * Tested with all major Ruby releases except IronRuby.
22
+ * jruby-1.6.3 (1.8.7): passes all tests.
23
+ * jruby-1.6.3 (1.9.2): fails one UTF-8 related test (see JRUBY-5927).
24
+ * macruby-0.10 (1.9.2): passes all tests. Segfault with "rake test" (see
25
+ https://www.macruby.org/trac/ticket/1362 for more information).
26
+ * maglev-ruby-0.9 (1.8.7): passes all tests.
27
+ * rubinius-1.2.5 (1.8.7): passes all tests.
28
+ * ree-1.8.7-2011.03 (1.8.7): passes all tests.
29
+ * MRI-1.8.7-p352 (1.8.7): passes all tests.
30
+ * MRI-1.9.2-p290 (1.9.2): passes all tests.
31
+
32
+ == 1.0.2 / 2011.02.09
33
+ * Moved to 'hoe' and GitHub.
34
+ * Preparing for 2.0 which will be Ruby 1.9-only for UTF-8.
35
+ * Fixing German support (RubyForge 28498):
36
+ * Choosing 'de' as a language will load 'de1'. Choosing 'de1' or 'de2' will
37
+ load properly now, but they will be reported with an ISO language code of
38
+ 'de' (new optional #isocode attribute on a language definition that will
39
+ override the #iso_language setting of a Text::Hyphen instance if set).
40
+ * Both 'de1' and 'de2' can be loaded simultaneously now, but the first one
41
+ loaded will claim the Text::Hyphen::Language::DE constant.
42
+ * Added test cases for bugs:
43
+ * RubyForge 9807 (cannot reproduce)
44
+ * RubyForge 28128 (cannot reproduce)
45
+ * RubyForge 28498
46
+
47
+ == 1.0.1
48
+ * Minor modification to the RubyGem release of Text::Hyphen to enable the
49
+ hyphen command-line program.
50
+
51
+ == 1.0.0
52
+ * Initial version based on TeX::Hyphen 0.4.0 (some changes have been
53
+ backported to TeX::Hyphen 0.5.0).
54
+ * Incorporated many hyphenation pattern files from CTAN.
@@ -0,0 +1,159 @@
1
+ == License
2
+
3
+ Licensing for Text::Hyphen is unfortunately complex because of the various
4
+ copyrights and licenses of the source hyphenation files that have been
5
+ converted to Ruby format. Some of these files are available only under the TeX
6
+ license and others are available only under the GNU GPL while others are public
7
+ domain. Each language file has these licenses embedded within the file. Please
8
+ consult each file's license to ensure that it is compatible with your
9
+ application.
10
+
11
+ The Text::Hyphen library software, the application ruby-hyphen, and the library
12
+ (gem) as a compilation is licensed under the terms of the MIT license. The
13
+ files in this distribution covered by this license are in the list below called
14
+ "Library Files".
15
+
16
+ Individual language hyphenation files (in the list called "Language Files") are
17
+ maintained under the license described in the language file itself; the
18
+ copyright for these original files is held by the original authors; any
19
+ mistakes made in conversion of these files to Ruby is attributable to the
20
+ contributors of the Text::Hyphen package only. If license information is not
21
+ present in a given Language File, it should be considered under the terms of
22
+ TeX.
23
+
24
+ === Library License
25
+ * Copyright Austin Ziegler, 2004–2011.
26
+
27
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
28
+ this software and associated documentation files (the "Software"), to deal in
29
+ the Software without restriction, including without limitation the rights to
30
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
31
+ of the Software, and to permit persons to whom the Software is furnished to do
32
+ so, subject to the following conditions:
33
+
34
+ The above copyright notice and this permission notice shall be included in all
35
+ copies or substantial portions of the Software.
36
+
37
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
38
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
39
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
40
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
41
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
42
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
43
+ SOFTWARE.
44
+
45
+ The copyright on the Text::Hyphen application/library and the Ruby
46
+ translations of hyphenation files belongs to Austin Ziegler. All other
47
+ copyrights on original versions still stand; Text::Hyphen is a derivative work
48
+ of these and other projects.
49
+
50
+ === Library Files
51
+ Note that while this list appears to include language files, these are "loader"
52
+ files only and do not contain the hyphenation patterns themselves.
53
+
54
+ * lib/text-hyphen.rb
55
+ * lib/text/hyphen.rb
56
+ * lib/text/hyphen/language.rb
57
+ * lib/text/hyphen/language/ca.rb
58
+ * lib/text/hyphen/language/cs.rb
59
+ * lib/text/hyphen/language/da.rb
60
+ * lib/text/hyphen/language/de.rb
61
+ * lib/text/hyphen/language/de1.rb
62
+ * lib/text/hyphen/language/de2.rb
63
+ * lib/text/hyphen/language/en_uk.rb
64
+ * lib/text/hyphen/language/en_us.rb
65
+ * lib/text/hyphen/language/es.rb
66
+ * lib/text/hyphen/language/et.rb
67
+ * lib/text/hyphen/language/eu.rb
68
+ * lib/text/hyphen/language/fi.rb
69
+ * lib/text/hyphen/language/fr.rb
70
+ * lib/text/hyphen/language/ga.rb
71
+ * lib/text/hyphen/language/hr.rb
72
+ * lib/text/hyphen/language/hsb.rb
73
+ * lib/text/hyphen/language/hu.rb
74
+ * lib/text/hyphen/language/hu1.rb
75
+ * lib/text/hyphen/language/hu2.rb
76
+ * lib/text/hyphen/language/ia.rb
77
+ * lib/text/hyphen/language/id.rb
78
+ * lib/text/hyphen/language/is.rb
79
+ * lib/text/hyphen/language/it.rb
80
+ * lib/text/hyphen/language/la.rb
81
+ * lib/text/hyphen/language/mn.rb
82
+ * lib/text/hyphen/language/ms.rb
83
+ * lib/text/hyphen/language/nl.rb
84
+ * lib/text/hyphen/language/no.rb
85
+ * lib/text/hyphen/language/no1.rb
86
+ * lib/text/hyphen/language/no2.rb
87
+ * lib/text/hyphen/language/pl.rb
88
+ * lib/text/hyphen/language/pt.rb
89
+ * lib/text/hyphen/language/sv.rb
90
+ * test/data/bug_9807_latin1.rb
91
+ * test/data/bug_9807_utf-8.rb
92
+ * test/test_bugs.rb
93
+ * test/test_text_hyphen.rb
94
+ * text-hyphen.gemspec
95
+ * History.rdoc
96
+ * License.rdoc
97
+ * README.rdoc
98
+ * Rakefile
99
+ * bin/ruby-hyphen
100
+
101
+ === Language Files
102
+ * lib/text/hyphen/language/1.8/ca.rb
103
+ * lib/text/hyphen/language/1.8/cs.rb
104
+ * lib/text/hyphen/language/1.8/da.rb
105
+ * lib/text/hyphen/language/1.8/de1.rb
106
+ * lib/text/hyphen/language/1.8/de2.rb
107
+ * lib/text/hyphen/language/1.8/en_uk.rb
108
+ * lib/text/hyphen/language/1.8/en_us.rb
109
+ * lib/text/hyphen/language/1.8/es.rb
110
+ * lib/text/hyphen/language/1.8/et.rb
111
+ * lib/text/hyphen/language/1.8/eu.rb
112
+ * lib/text/hyphen/language/1.8/fi.rb
113
+ * lib/text/hyphen/language/1.8/fr.rb
114
+ * lib/text/hyphen/language/1.8/ga.rb
115
+ * lib/text/hyphen/language/1.8/hr.rb
116
+ * lib/text/hyphen/language/1.8/hsb.rb
117
+ * lib/text/hyphen/language/1.8/hu1.rb
118
+ * lib/text/hyphen/language/1.8/hu2.rb
119
+ * lib/text/hyphen/language/1.8/ia.rb
120
+ * lib/text/hyphen/language/1.8/id.rb
121
+ * lib/text/hyphen/language/1.8/is.rb
122
+ * lib/text/hyphen/language/1.8/it.rb
123
+ * lib/text/hyphen/language/1.8/la.rb
124
+ * lib/text/hyphen/language/1.8/mn.rb
125
+ * lib/text/hyphen/language/1.8/nl.rb
126
+ * lib/text/hyphen/language/1.8/no1.rb
127
+ * lib/text/hyphen/language/1.8/no2.rb
128
+ * lib/text/hyphen/language/1.8/pl.rb
129
+ * lib/text/hyphen/language/1.8/pt.rb
130
+ * lib/text/hyphen/language/1.8/sv.rb
131
+ * lib/text/hyphen/language/1.9/ca.rb
132
+ * lib/text/hyphen/language/1.9/cs.rb
133
+ * lib/text/hyphen/language/1.9/da.rb
134
+ * lib/text/hyphen/language/1.9/de1.rb
135
+ * lib/text/hyphen/language/1.9/de2.rb
136
+ * lib/text/hyphen/language/1.9/en_uk.rb
137
+ * lib/text/hyphen/language/1.9/en_us.rb
138
+ * lib/text/hyphen/language/1.9/es.rb
139
+ * lib/text/hyphen/language/1.9/et.rb
140
+ * lib/text/hyphen/language/1.9/eu.rb
141
+ * lib/text/hyphen/language/1.9/fi.rb
142
+ * lib/text/hyphen/language/1.9/fr.rb
143
+ * lib/text/hyphen/language/1.9/ga.rb
144
+ * lib/text/hyphen/language/1.9/hr.rb
145
+ * lib/text/hyphen/language/1.9/hsb.rb
146
+ * lib/text/hyphen/language/1.9/hu1.rb
147
+ * lib/text/hyphen/language/1.9/hu2.rb
148
+ * lib/text/hyphen/language/1.9/ia.rb
149
+ * lib/text/hyphen/language/1.9/id.rb
150
+ * lib/text/hyphen/language/1.9/is.rb
151
+ * lib/text/hyphen/language/1.9/it.rb
152
+ * lib/text/hyphen/language/1.9/la.rb
153
+ * lib/text/hyphen/language/1.9/mn.rb
154
+ * lib/text/hyphen/language/1.9/nl.rb
155
+ * lib/text/hyphen/language/1.9/no1.rb
156
+ * lib/text/hyphen/language/1.9/no2.rb
157
+ * lib/text/hyphen/language/1.9/pl.rb
158
+ * lib/text/hyphen/language/1.9/pt.rb
159
+ * lib/text/hyphen/language/1.9/sv.rb
@@ -1,14 +1,71 @@
1
1
  .autotest
2
- COPYING.txt
3
- History.txt
4
- LICENCE.txt
2
+ History.rdoc
3
+ License.rdoc
5
4
  Manifest.txt
6
- README.txt
5
+ README.rdoc
7
6
  Rakefile
8
- bin/hyphen
7
+ bin/ruby-hyphen
9
8
  lib/text-hyphen.rb
10
9
  lib/text/hyphen.rb
11
10
  lib/text/hyphen/language.rb
11
+ lib/text/hyphen/language/1.8/ca.rb
12
+ lib/text/hyphen/language/1.8/cs.rb
13
+ lib/text/hyphen/language/1.8/da.rb
14
+ lib/text/hyphen/language/1.8/de1.rb
15
+ lib/text/hyphen/language/1.8/de2.rb
16
+ lib/text/hyphen/language/1.8/en_uk.rb
17
+ lib/text/hyphen/language/1.8/en_us.rb
18
+ lib/text/hyphen/language/1.8/es.rb
19
+ lib/text/hyphen/language/1.8/et.rb
20
+ lib/text/hyphen/language/1.8/eu.rb
21
+ lib/text/hyphen/language/1.8/fi.rb
22
+ lib/text/hyphen/language/1.8/fr.rb
23
+ lib/text/hyphen/language/1.8/ga.rb
24
+ lib/text/hyphen/language/1.8/hr.rb
25
+ lib/text/hyphen/language/1.8/hsb.rb
26
+ lib/text/hyphen/language/1.8/hu1.rb
27
+ lib/text/hyphen/language/1.8/hu2.rb
28
+ lib/text/hyphen/language/1.8/ia.rb
29
+ lib/text/hyphen/language/1.8/id.rb
30
+ lib/text/hyphen/language/1.8/is.rb
31
+ lib/text/hyphen/language/1.8/it.rb
32
+ lib/text/hyphen/language/1.8/la.rb
33
+ lib/text/hyphen/language/1.8/mn.rb
34
+ lib/text/hyphen/language/1.8/nl.rb
35
+ lib/text/hyphen/language/1.8/no1.rb
36
+ lib/text/hyphen/language/1.8/no2.rb
37
+ lib/text/hyphen/language/1.8/pl.rb
38
+ lib/text/hyphen/language/1.8/pt.rb
39
+ lib/text/hyphen/language/1.8/sv.rb
40
+ lib/text/hyphen/language/1.9/ca.rb
41
+ lib/text/hyphen/language/1.9/cs.rb
42
+ lib/text/hyphen/language/1.9/da.rb
43
+ lib/text/hyphen/language/1.9/de1.rb
44
+ lib/text/hyphen/language/1.9/de2.rb
45
+ lib/text/hyphen/language/1.9/en_uk.rb
46
+ lib/text/hyphen/language/1.9/en_us.rb
47
+ lib/text/hyphen/language/1.9/es.rb
48
+ lib/text/hyphen/language/1.9/et.rb
49
+ lib/text/hyphen/language/1.9/eu.rb
50
+ lib/text/hyphen/language/1.9/fi.rb
51
+ lib/text/hyphen/language/1.9/fr.rb
52
+ lib/text/hyphen/language/1.9/ga.rb
53
+ lib/text/hyphen/language/1.9/hr.rb
54
+ lib/text/hyphen/language/1.9/hsb.rb
55
+ lib/text/hyphen/language/1.9/hu1.rb
56
+ lib/text/hyphen/language/1.9/hu2.rb
57
+ lib/text/hyphen/language/1.9/ia.rb
58
+ lib/text/hyphen/language/1.9/id.rb
59
+ lib/text/hyphen/language/1.9/is.rb
60
+ lib/text/hyphen/language/1.9/it.rb
61
+ lib/text/hyphen/language/1.9/la.rb
62
+ lib/text/hyphen/language/1.9/mn.rb
63
+ lib/text/hyphen/language/1.9/nl.rb
64
+ lib/text/hyphen/language/1.9/no1.rb
65
+ lib/text/hyphen/language/1.9/no2.rb
66
+ lib/text/hyphen/language/1.9/pl.rb
67
+ lib/text/hyphen/language/1.9/pt.rb
68
+ lib/text/hyphen/language/1.9/sv.rb
12
69
  lib/text/hyphen/language/ca.rb
13
70
  lib/text/hyphen/language/cs.rb
14
71
  lib/text/hyphen/language/da.rb
@@ -25,6 +82,7 @@ lib/text/hyphen/language/fr.rb
25
82
  lib/text/hyphen/language/ga.rb
26
83
  lib/text/hyphen/language/hr.rb
27
84
  lib/text/hyphen/language/hsb.rb
85
+ lib/text/hyphen/language/hu.rb
28
86
  lib/text/hyphen/language/hu1.rb
29
87
  lib/text/hyphen/language/hu2.rb
30
88
  lib/text/hyphen/language/ia.rb
@@ -33,12 +91,16 @@ lib/text/hyphen/language/is.rb
33
91
  lib/text/hyphen/language/it.rb
34
92
  lib/text/hyphen/language/la.rb
35
93
  lib/text/hyphen/language/mn.rb
94
+ lib/text/hyphen/language/ms.rb
36
95
  lib/text/hyphen/language/nl.rb
96
+ lib/text/hyphen/language/no.rb
37
97
  lib/text/hyphen/language/no1.rb
38
98
  lib/text/hyphen/language/no2.rb
39
99
  lib/text/hyphen/language/pl.rb
40
100
  lib/text/hyphen/language/pt.rb
41
101
  lib/text/hyphen/language/sv.rb
102
+ test/data/bug_9807_latin1.rb
103
+ test/data/bug_9807_utf-8.rb
42
104
  test/test_bugs.rb
43
105
  test/test_text_hyphen.rb
44
106
  text-hyphen.gemspec
@@ -0,0 +1,69 @@
1
+ = text-hyphen
2
+
3
+ == Description
4
+
5
+ Text::Hyphen is a Ruby library to hyphenate words in various languages using
6
+ Ruby-fied versions of TeX hyphenation patterns. It will properly hyphenate
7
+ various words according to the rules of the language the word is written in.
8
+ The algorithm is based on that of the TeX typesetting system by Donald E.
9
+ Knuth.
10
+
11
+ This is originally based on the Perl implementation of
12
+ {TeX::Hyphen}[http://search.cpan.org/author/JANPAZ/TeX-Hyphen-0.140/lib/TeX/Hyphen.pm]
13
+ and the {Ruby port}[http://rubyforge.org/projects/text-format]. The language
14
+ hyphenation pattern files are based on the sources available from
15
+ {CTAN}[http://www.ctan.org] as of 2004.12.19 and have been manually translated
16
+ by Austin Ziegler.
17
+
18
+ This release is 1.2. This is a major release providing both Ruby 1.8.7 and Ruby
19
+ 1.9.2 support. This is the last major release supporting Ruby 1.8 interpreters.
20
+ Future versions will only work with Ruby 1.9 or later interpreters.
21
+
22
+ == Where
23
+
24
+ * {RubyForge}[http://rubyforge.org/projects/text-format/]
25
+ * {RubyGems}[https://rubygems.org/gems/text-hyphen]
26
+ * {GitHub}[https://github.com/halostatue/text-hyphen/]
27
+ * {RDoc}[http://rdoc.info/github/halostatue/text-hyphen/master/frames]
28
+
29
+ == Synopsis
30
+
31
+ require 'text/hyphen'
32
+ hh = Text::Hyphen.new(:language => 'en_us', :left => 2, :right => 2)
33
+ # Defaults to the above
34
+ hh = TeX::Hyphen.new
35
+
36
+ word = "representation"
37
+ points = hyp.hyphenate(word) #=> [3, 5, 8, 10]
38
+ puts hyp.visualize(word) #=> rep-re-sen-ta-tion
39
+
40
+ Text::Hyphen is truly multilingual, with 29 languages or language variants
41
+ supported. As an example, consider the difference between the following:
42
+
43
+ require 'text/hyphen'
44
+ # Using left and right minimum values of 0 ensures that you will see all
45
+ # possible hyphenation points, not just those that meet the minimum width
46
+ # requirements.
47
+ en = Text::Hyphen.new(:left => 0, :right => 0)
48
+ fr = Text::Hyphen.new(:language => "fr", :left => 0, :right => 0)
49
+
50
+ puts en.visualise("organiser") #=> or-gan-iser
51
+ puts fr.visualise("organiser") #=> or-ga-ni-ser
52
+
53
+ As you can see, the hyphenation is distinct between the two hyphenators.
54
+ Additional improvements over TeX::Hyphen include thread safety (except for
55
+ debug control) and support for UTF-8 under Ruby 1.9.
56
+
57
+ == Install
58
+ gem install text-hyphen
59
+
60
+ == Developers
61
+
62
+ After checking out the source, run:
63
+
64
+ $ rake newb
65
+
66
+ This task will install any missing dependencies, run the tests/specs,
67
+ and generate the RDoc.
68
+
69
+ :include: License.rdoc
data/Rakefile CHANGED
@@ -1,19 +1,23 @@
1
- # -*- ruby -*-
1
+ # -*- ruby encoding: utf-8 -*-
2
2
 
3
3
  require 'rubygems'
4
4
  require 'hoe'
5
- require 'rubyforge'
6
5
 
7
6
  Hoe.plugin :doofus
8
7
  Hoe.plugin :gemspec
9
8
  Hoe.plugin :git
10
9
  Hoe.plugin :rubyforge
11
10
 
12
- Hoe.spec 'text-hyphen' do
11
+ Hoe.spec 'text-hyphen' do |spec|
13
12
  developer('Austin Ziegler', 'austin@rubyforge.org')
13
+
14
14
  self.rubyforge_name = 'text-format'
15
+ spec.remote_rdoc_dir = 'text-hyphen/rdoc'
16
+ spec.rsync_args << ' --exclude=statsvn/'
15
17
 
16
- self.spec_extras[:required_ruby_version] = '< 1.9'
18
+ spec.history_file = 'History.rdoc'
19
+ spec.readme_file = 'README.rdoc'
20
+ spec.extra_rdoc_files = FileList["*.rdoc"].to_a
17
21
 
18
22
  self.extra_dev_deps << ['hoe-doofus', '~> 1.0']
19
23
  self.extra_dev_deps << ['hoe-gemspec', '~> 1.0']
File without changes
@@ -1 +1,2 @@
1
+ # -*- ruby encoding: utf-8 -*-
1
2
  require 'text/hyphen'
@@ -1,95 +1,26 @@
1
- module Text; end
2
-
3
- # = Introduction
4
- # Text::Hyphen -- hyphenate words using modified versions of TeX hyphenation
5
- # patterns.
6
- #
7
- # == Usage
8
- # require 'text/hyphen'
9
- # hh = Text::Hyphen.new(:language => 'en_us', :left => 2, :right => 2)
10
- # # Defaults to the above
11
- # hh = TeX::Hyphen.new
12
- #
13
- # word = "representation"
14
- # points = hyp.hyphenate(word) #=> [3, 5, 8, 10]
15
- # puts hyp.visualize(word) #=> rep-re-sen-ta-tion
16
- #
17
- # en = Text::Hyphen.new(:left => 0, :right => 0)
18
- # fr = Text::Hyphen.new(:language = "fr", :left => 0, :right => 0)
19
- # puts en.visualise("organiser") #=> or-gan-iser
20
- # puts fr.visualise("organiser") #=> or-ga-ni-ser
21
- #
22
- # == Description
23
- # Creates a new Hyphen object and loads the language patterns into memory.
24
- # The hyphenator can then be asked for the hyphenation of a word. If no
25
- # language is specified, then the language en_us (EN_US) is used by default.
26
- #
27
- # Copyright:: Copyright (c) 2004 - 2005 Austin Ziegler
28
- # Version:: 1.0.2
29
- # Based On:: <tt>TeX::Hyphen</tt> 0.4 Copyright (c) 2003 - 2004
30
- # Martin DeMello and Austin Ziegler, in turn based on
31
- # Perl's <tt>TeX::Hyphen</tt>
32
- # [http://search.cpan.org/author/JANPAZ/TeX-Hyphen-0.140/lib/TeX/Hyphen.pm]
33
- # Copyright (c) 1997 - 2002 Jan Pazdziora
34
- #
35
- # == Licence
36
- # Licensing for Text::Hyphen is unfortunately complex because of the various
37
- # copyrights and licences of the source hyphenation files. Some of these
38
- # files are available only under the TeX licence and others are available
39
- # only under the GNU GPL while others are public domain. Each language file
40
- # has these licences embedded within the file. Please consult each file's
41
- # licence to ensure that it is compatible with your application.
42
- #
43
- # The copyright on the Text::Hyphen application/library and the Ruby
44
- # translations of hyphenation files belongs to Austin Ziegler. All other
45
- # copyrights on original versions still stand; Text::Hyphen is a derivative
46
- # work of these and other projects.
47
- #
48
- # === Application and Compilation Licences
49
- # Text::Hyphen, the application/library is licensed under the same terms as
50
- # Ruby. Note that this specifically refers to the contents of bin/hyphen,
51
- # lib/text/hyphen.rb, and lib/text/hyphen/language.rb.
52
- #
53
- # Individual language hyphenation files are NOT licensed under these terms,
54
- # but under the following MIT-style licence and the original hyphenation
55
- # pattern licenses. The copyright for the original TeX hyphenation files is
56
- # held by the original authors; any mistakes in conversion of these files to
57
- # Ruby is attributable to the contributors to the Text::Hyphen package only.
58
- #
59
- # The compilation package Text::Hyphen is licensed under the same terms as
60
- # Ruby.
61
- #
62
- # === Blanket Language Hyphenation File Licence
63
- # Permission is hereby granted, free of charge, to any person obtaining a
64
- # copy of this software and associated documentation files (the "Software"),
65
- # to deal in the Software without restriction, including without limitation
66
- # the rights to use, copy, modify, merge, publish, distribute, sublicense,
67
- # and/or sell copies of the Software, and to permit persons to whom the
68
- # Software is furnished to do so, subject to the following conditions:
69
- #
70
- # The above copyright notice and this permission notice shall be included in
71
- # all copies or substantial portions of the Software.
72
- #
73
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
74
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
75
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
76
- # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
77
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
78
- # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
79
- # DEALINGS IN THE SOFTWARE.
1
+ # -*- ruby encoding: utf-8 -*-
2
+ module Text # :nodoc:
3
+ end
4
+
5
+ # An object that knows how to perform hyphenation based on the TeX
6
+ # hyphenation algorithm with pattern files. Each object is constructed with
7
+ # a specific language's hyphenation patterns.
80
8
  class Text::Hyphen
81
9
  DEBUG = false
82
- VERSION = '1.0.2'
10
+ VERSION = '1.2'
83
11
 
84
12
  DEFAULT_MIN_LEFT = 2
85
13
  DEFAULT_MIN_RIGHT = 2
86
14
 
87
15
  # No fewer than this number of letters will show up to the left of the
88
- # hyphen. This overrides the default specified in the language.
16
+ # hyphen. The initial value for this will be specified by the language;
17
+ # setting this value will override the language's defaults.
89
18
  attr_accessor :left
19
+
90
20
  # No fewer than this number of letters will show up to the right of the
91
21
  # hyphen. This overrides the default specified in the language.
92
22
  attr_accessor :right
23
+
93
24
  # The name of the language to be used in hyphenating words. This will be a
94
25
  # two or three character ISO 639 code, with the two character form being
95
26
  # the canonical resource name. This will load the language hyphenation
@@ -101,14 +32,15 @@ class Text::Hyphen
101
32
  # Minimal transformations will be performed on the language code provided,
102
33
  # such that any dashes are converted to underscores (e.g., 'en-us' becomes
103
34
  # 'en_us') and all characters are regularised. Resource names will be
104
- # downcased and class names will be upcased (e.g., 'Pt' for the Portuguese
105
- # language becomes 'pt' and 'PT', respectively).
35
+ # downcased and class names will be converted to uppercase (e.g., 'Pt' for
36
+ # the Portuguese language becomes 'pt' and 'PT', respectively).
106
37
  #
107
38
  # The language may also be specified as an instance of
108
39
  # Text::Hyphen::Language.
109
40
  attr_accessor :language
41
+
110
42
  undef :language=
111
- def language=(lang)
43
+ def language=(lang) #:nodoc:
112
44
  require 'text/hyphen/language' unless defined?(Text::Hyphen::Language)
113
45
  if lang.kind_of? Text::Hyphen::Language
114
46
  @iso_language = lang.to_s.split(%r{::}o)[-1].downcase
@@ -119,13 +51,27 @@ class Text::Hyphen
119
51
  end
120
52
  @iso_language
121
53
  end
54
+
122
55
  # Returns the language's ISO 639 ID, e.g., "en_us" or "pt".
123
- attr_reader :iso_language
56
+ attr_reader :iso_language
124
57
 
125
- # The following initializations are equivalent:
58
+ # Creates a hyphenation object with the options requested. The options
59
+ # available are:
126
60
  #
127
- # hyp = TeX::Hyphenate.new(:language => "EU")
128
- # hyp = TeX::Hyphenate.new { |h| h.language = "EU" }
61
+ # language:: The language to perform hyphenation with. See #language and
62
+ # #iso_language.
63
+ # left:: The minimum number of characters to the left of a
64
+ # hyphenation point. See #left.
65
+ # right:: The minimum number of characters to the right of a
66
+ # hyphenation point. See #right.
67
+ #
68
+ # The options can be provided either as hashed parameters or set as
69
+ # methods in an initialization block. The following initializations are
70
+ # all equivalent:
71
+ #
72
+ # hyp = Text::Hyphenate.new(:language => 'en_us')
73
+ # hyp = Text::Hyphenate.new(language: 'en_us') # under Ruby 1.9
74
+ # hyp = Text::Hyphenate.new { |h| h.language = 'en_us' }
129
75
  def initialize(options = {}) # :yields self:
130
76
  @iso_language = options[:language]
131
77
  @left = options[:left]
@@ -147,16 +93,16 @@ class Text::Hyphen
147
93
 
148
94
  load_language
149
95
 
150
- @left ||= DEFAULT_MIN_LEFT
151
- @right ||= DEFAULT_MIN_RIGHT
96
+ @left ||= DEFAULT_MIN_LEFT
97
+ @right ||= DEFAULT_MIN_RIGHT
152
98
  end
153
99
 
154
- # Returns a list of places where the word can be divided, as
100
+ # Returns an array of character positions where a word can be hyphenated.
155
101
  #
156
- # hyp.hyphenate('representation')
102
+ # hyp.hyphenate('representation') #=> [3, 5, 8 10]
157
103
  #
158
- # returns [3, 5, 8, 10]. If the word has been hyphenated previously, it
159
- # will be returned from a per-instance cache.
104
+ # Because hyphenation can be expensive, if the word has been hyphenated
105
+ # previously, it will be returned from a per-instance cache.
160
106
  def hyphenate(word)
161
107
  word = word.downcase
162
108
  $stderr.puts "Hyphenating #{word}" if DEBUG
@@ -164,28 +110,32 @@ class Text::Hyphen
164
110
  res = @language.exceptions[word]
165
111
  return @cache[word] = make_result_list(res) if res
166
112
 
167
- result = [0] * (word.split(//).size + 1)
168
- rightstop = word.split(//).size - @right
113
+ letters = word.scan(@language.scan_re)
114
+ $stderr.puts letters.inspect if DEBUG
115
+ word_size = letters.size
116
+
117
+ result = [0] * (word_size + 1)
118
+ right_stop = word_size - @right
169
119
 
170
120
  updater = Proc.new do |hash, str, pos|
171
121
  if hash.has_key?(str)
172
122
  $stderr.print "#{pos}: #{str}: #{hash[str]}" if DEBUG
173
- hash[str].split(//).each_with_index do |cc, ii|
123
+ hash[str].scan(@language.scan_re).each_with_index do |cc, ii|
174
124
  cc = cc.to_i
175
125
  result[ii + pos] = cc if cc > result[ii + pos]
176
126
  end
177
- $stderr.print ": #{result}\n" if DEBUG
127
+ $stderr.print ": #{result.inspect}\n" if DEBUG
178
128
  end
179
129
  end
180
130
 
181
131
  # Walk the word
182
- (0..rightstop).each do |pos|
183
- restlength = word.length - pos
184
- (1..restlength).each do |length|
185
- substr = word[pos, length]
132
+ (0..right_stop).each do |pos|
133
+ rest_length = word_size - pos
134
+ (1..rest_length).each do |length|
135
+ substr = letters[pos, length].join('')
186
136
  updater[@language.hyphen, substr, pos]
187
137
  updater[@language.start, substr, pos] if pos.zero?
188
- updater[@language.stop, substr, pos] if (length == restlength)
138
+ updater[@language.stop, substr, pos] if (length == rest_length)
189
139
  end
190
140
  end
191
141
 
@@ -196,23 +146,23 @@ class Text::Hyphen
196
146
  @cache[word] = make_result_list(result)
197
147
  end
198
148
 
199
- # Returns a visualization of the hyphenation points, so:
149
+ # Returns a visualization of the hyphenation points.
200
150
  #
201
- # hyp.visualize('representation')
151
+ # hyp.visualize('representation') #=> rep-re-sen-ta-tion
202
152
  #
203
- # returns <tt>rep-re-sen-ta-tion</tt>, at least for English patterns. If
204
- # the word has been visualised previously, it will be returned from a
205
- # per-instance cache.
153
+ # Because hyphenation can be expensive, if the word has been visualised
154
+ # previously, it will be returned from a per-instance cache.
206
155
  def visualise(word)
207
156
  return @vcache[word] if @vcache.has_key?(word)
208
157
  w = word.dup
209
- hyphenate(w).each_with_index do |pos, n|
158
+ hyphenate(w).each_with_index do |pos, n|
210
159
  w[pos.to_i + n, 0] = '-' if pos != 0
211
160
  end
212
161
  @vcache[word] = w
213
162
  end
214
163
  alias visualize visualise
215
164
 
165
+ # Clears the per-instance hyphenation and visualization caches.
216
166
  def clear_cache!
217
167
  @cache.clear
218
168
  @vcache.clear
@@ -229,7 +179,8 @@ class Text::Hyphen
229
179
  end
230
180
  end
231
181
 
232
- # Returns statistics
182
+ # Returns a string describing the structure of the patterns for the
183
+ # language of this hyphenation object.
233
184
  def stats
234
185
  _b = @language.both.size
235
186
  _s = @language.start.size
@@ -254,7 +205,7 @@ EOS
254
205
  def updateresult(hash, str, pos)
255
206
  if hash.has_key?(str)
256
207
  STDERR.print "#{pos}: #{str}: #{hash[str]}" if DEBUG
257
- hash[str].split('').each_with_index do |c, i|
208
+ hash[str].scan(@language.scan_re).each_with_index do |c, i|
258
209
  c = c.to_i
259
210
  @result[i + pos] = c if c > @result[i + pos]
260
211
  end
@@ -287,4 +238,16 @@ EOS
287
238
  @iso_language
288
239
  end
289
240
  private :load_language
241
+
242
+ # Resolves a file for cleaner loading from a hyphenation loader file.
243
+ def self.require_real_hyphenation_file(loader) # :nodoc:
244
+ p = File.dirname(loader)
245
+ f = File.basename(loader)
246
+ v = if RUBY_VERSION < "1.9.1"
247
+ "1.8"
248
+ else
249
+ "1.9"
250
+ end
251
+ require File.join(p, v, f)
252
+ end
290
253
  end