text-hyphen 1.0.2 → 1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. data/.gemtest +0 -0
  2. data/History.rdoc +54 -0
  3. data/License.rdoc +159 -0
  4. data/Manifest.txt +67 -5
  5. data/README.rdoc +69 -0
  6. data/Rakefile +8 -4
  7. data/bin/{hyphen → ruby-hyphen} +0 -0
  8. data/lib/text-hyphen.rb +1 -0
  9. data/lib/text/hyphen.rb +74 -111
  10. data/lib/text/hyphen/language.rb +90 -26
  11. data/lib/text/hyphen/language/1.8/ca.rb +171 -0
  12. data/lib/text/hyphen/language/1.8/cs.rb +360 -0
  13. data/lib/text/hyphen/language/1.8/da.rb +117 -0
  14. data/lib/text/hyphen/language/1.8/de1.rb +718 -0
  15. data/lib/text/hyphen/language/1.8/de2.rb +680 -0
  16. data/lib/text/hyphen/language/1.8/en_uk.rb +789 -0
  17. data/lib/text/hyphen/language/1.8/en_us.rb +490 -0
  18. data/lib/text/hyphen/language/1.8/es.rb +287 -0
  19. data/lib/text/hyphen/language/1.8/et.rb +335 -0
  20. data/lib/text/hyphen/language/1.8/eu.rb +112 -0
  21. data/lib/text/hyphen/language/1.8/fi.rb +112 -0
  22. data/lib/text/hyphen/language/1.8/fr.rb +389 -0
  23. data/lib/text/hyphen/language/1.8/ga.rb +606 -0
  24. data/lib/text/hyphen/language/1.8/hr.rb +122 -0
  25. data/lib/text/hyphen/language/1.8/hsb.rb +179 -0
  26. data/lib/text/hyphen/language/1.8/hu1.rb +380 -0
  27. data/lib/text/hyphen/language/1.8/hu2.rb +1278 -0
  28. data/lib/text/hyphen/language/1.8/ia.rb +71 -0
  29. data/lib/text/hyphen/language/1.8/id.rb +91 -0
  30. data/lib/text/hyphen/language/1.8/is.rb +387 -0
  31. data/lib/text/hyphen/language/1.8/it.rb +133 -0
  32. data/lib/text/hyphen/language/1.8/la.rb +132 -0
  33. data/lib/text/hyphen/language/1.8/mn.rb +101 -0
  34. data/lib/text/hyphen/language/1.8/nl.rb +1250 -0
  35. data/lib/text/hyphen/language/1.8/no1.rb +299 -0
  36. data/lib/text/hyphen/language/1.8/no2.rb +134 -0
  37. data/lib/text/hyphen/language/1.8/pl.rb +478 -0
  38. data/lib/text/hyphen/language/1.8/pt.rb +54 -0
  39. data/lib/text/hyphen/language/1.8/sv.rb +447 -0
  40. data/lib/text/hyphen/language/1.9/ca.rb +174 -0
  41. data/lib/text/hyphen/language/1.9/cs.rb +361 -0
  42. data/lib/text/hyphen/language/1.9/da.rb +117 -0
  43. data/lib/text/hyphen/language/1.9/de1.rb +719 -0
  44. data/lib/text/hyphen/language/1.9/de2.rb +682 -0
  45. data/lib/text/hyphen/language/1.9/en_uk.rb +791 -0
  46. data/lib/text/hyphen/language/1.9/en_us.rb +492 -0
  47. data/lib/text/hyphen/language/1.9/es.rb +289 -0
  48. data/lib/text/hyphen/language/1.9/et.rb +336 -0
  49. data/lib/text/hyphen/language/1.9/eu.rb +114 -0
  50. data/lib/text/hyphen/language/1.9/fi.rb +113 -0
  51. data/lib/text/hyphen/language/1.9/fr.rb +391 -0
  52. data/lib/text/hyphen/language/1.9/ga.rb +608 -0
  53. data/lib/text/hyphen/language/1.9/hr.rb +123 -0
  54. data/lib/text/hyphen/language/1.9/hsb.rb +180 -0
  55. data/lib/text/hyphen/language/1.9/hu1.rb +382 -0
  56. data/lib/text/hyphen/language/1.9/hu2.rb +1280 -0
  57. data/lib/text/hyphen/language/1.9/ia.rb +73 -0
  58. data/lib/text/hyphen/language/1.9/id.rb +93 -0
  59. data/lib/text/hyphen/language/1.9/is.rb +388 -0
  60. data/lib/text/hyphen/language/1.9/it.rb +134 -0
  61. data/lib/text/hyphen/language/1.9/la.rb +134 -0
  62. data/lib/text/hyphen/language/1.9/mn.rb +102 -0
  63. data/lib/text/hyphen/language/1.9/nl.rb +1252 -0
  64. data/lib/text/hyphen/language/1.9/no1.rb +301 -0
  65. data/lib/text/hyphen/language/1.9/no2.rb +136 -0
  66. data/lib/text/hyphen/language/1.9/pl.rb +479 -0
  67. data/lib/text/hyphen/language/1.9/pt.rb +55 -0
  68. data/lib/text/hyphen/language/1.9/sv.rb +449 -0
  69. data/lib/text/hyphen/language/ca.rb +3 -173
  70. data/lib/text/hyphen/language/cs.rb +3 -362
  71. data/lib/text/hyphen/language/da.rb +3 -117
  72. data/lib/text/hyphen/language/de.rb +1 -0
  73. data/lib/text/hyphen/language/de1.rb +3 -724
  74. data/lib/text/hyphen/language/de2.rb +3 -685
  75. data/lib/text/hyphen/language/en_uk.rb +3 -790
  76. data/lib/text/hyphen/language/en_us.rb +3 -492
  77. data/lib/text/hyphen/language/es.rb +3 -288
  78. data/lib/text/hyphen/language/et.rb +3 -336
  79. data/lib/text/hyphen/language/eu.rb +3 -114
  80. data/lib/text/hyphen/language/fi.rb +3 -112
  81. data/lib/text/hyphen/language/fr.rb +3 -391
  82. data/lib/text/hyphen/language/ga.rb +3 -607
  83. data/lib/text/hyphen/language/hr.rb +3 -123
  84. data/lib/text/hyphen/language/hsb.rb +2 -179
  85. data/lib/text/hyphen/language/hu.rb +1 -0
  86. data/lib/text/hyphen/language/hu1.rb +3 -384
  87. data/lib/text/hyphen/language/hu2.rb +3 -1282
  88. data/lib/text/hyphen/language/ia.rb +3 -72
  89. data/lib/text/hyphen/language/id.rb +3 -96
  90. data/lib/text/hyphen/language/is.rb +3 -389
  91. data/lib/text/hyphen/language/it.rb +3 -134
  92. data/lib/text/hyphen/language/la.rb +3 -133
  93. data/lib/text/hyphen/language/mn.rb +3 -102
  94. data/lib/text/hyphen/language/ms.rb +9 -0
  95. data/lib/text/hyphen/language/nl.rb +3 -1252
  96. data/lib/text/hyphen/language/no.rb +1 -0
  97. data/lib/text/hyphen/language/no1.rb +3 -302
  98. data/lib/text/hyphen/language/no2.rb +3 -137
  99. data/lib/text/hyphen/language/pl.rb +3 -479
  100. data/lib/text/hyphen/language/pt.rb +3 -55
  101. data/lib/text/hyphen/language/sv.rb +3 -448
  102. data/test/data/bug_9807_latin1.rb +10 -0
  103. data/test/data/bug_9807_utf-8.rb +10 -0
  104. data/test/test_bugs.rb +14 -4
  105. data/test/test_text_hyphen.rb +3 -3
  106. data/text-hyphen.gemspec +29 -29
  107. metadata +101 -40
  108. data/COPYING.txt +0 -339
  109. data/History.txt +0 -23
  110. data/LICENCE.txt +0 -47
  111. data/README.txt +0 -82
File without changes
@@ -0,0 +1,54 @@
1
+ == 1.2 / 2011.07.17
2
+ * Major Enhancements:
3
+ * This release supports Ruby 1.9 with UTF-8 encodings. The language files are
4
+ duplicated for both Ruby 1.8 and 1.9 and the correct version is loaded
5
+ based on RUBY\_VERSION.
6
+ * Minor Enhancements:
7
+ * Making Hungarian and Norwegian language files act like the German language
8
+ files (both of these languages have two alternative hyphenation tables).
9
+ * Added a Malasy language file that should work correctly.
10
+ * Cleaned up the documentation.
11
+ * Bug Fixes:
12
+ * Fixed 9807 and 28128 (previously noted as not reproducible; quality
13
+ reproduction cases were found).
14
+ * Changes:
15
+ * Relicensing the core library to the MIT license and attempting to clarify
16
+ the license situation.
17
+ * Renaming hyphen to ruby-hyphen.
18
+ * This is the final release compatible with Ruby 1.8, first release
19
+ compatible with Ruby 1.9.
20
+ * Test Coverage:
21
+ * Tested with all major Ruby releases except IronRuby.
22
+ * jruby-1.6.3 (1.8.7): passes all tests.
23
+ * jruby-1.6.3 (1.9.2): fails one UTF-8 related test (see JRUBY-5927).
24
+ * macruby-0.10 (1.9.2): passes all tests. Segfault with "rake test" (see
25
+ https://www.macruby.org/trac/ticket/1362 for more information).
26
+ * maglev-ruby-0.9 (1.8.7): passes all tests.
27
+ * rubinius-1.2.5 (1.8.7): passes all tests.
28
+ * ree-1.8.7-2011.03 (1.8.7): passes all tests.
29
+ * MRI-1.8.7-p352 (1.8.7): passes all tests.
30
+ * MRI-1.9.2-p290 (1.9.2): passes all tests.
31
+
32
+ == 1.0.2 / 2011.02.09
33
+ * Moved to 'hoe' and GitHub.
34
+ * Preparing for 2.0 which will be Ruby 1.9-only for UTF-8.
35
+ * Fixing German support (RubyForge 28498):
36
+ * Choosing 'de' as a language will load 'de1'. Choosing 'de1' or 'de2' will
37
+ load properly now, but they will be reported with an ISO language code of
38
+ 'de' (new optional #isocode attribute on a language definition that will
39
+ override the #iso_language setting of a Text::Hyphen instance if set).
40
+ * Both 'de1' and 'de2' can be loaded simultaneously now, but the first one
41
+ loaded will claim the Text::Hyphen::Language::DE constant.
42
+ * Added test cases for bugs:
43
+ * RubyForge 9807 (cannot reproduce)
44
+ * RubyForge 28128 (cannot reproduce)
45
+ * RubyForge 28498
46
+
47
+ == 1.0.1
48
+ * Minor modification to the RubyGem release of Text::Hyphen to enable the
49
+ hyphen command-line program.
50
+
51
+ == 1.0.0
52
+ * Initial version based on TeX::Hyphen 0.4.0 (some changes have been
53
+ backported to TeX::Hyphen 0.5.0).
54
+ * Incorporated many hyphenation pattern files from CTAN.
@@ -0,0 +1,159 @@
1
+ == License
2
+
3
+ Licensing for Text::Hyphen is unfortunately complex because of the various
4
+ copyrights and licenses of the source hyphenation files that have been
5
+ converted to Ruby format. Some of these files are available only under the TeX
6
+ license and others are available only under the GNU GPL while others are public
7
+ domain. Each language file has these licenses embedded within the file. Please
8
+ consult each file's license to ensure that it is compatible with your
9
+ application.
10
+
11
+ The Text::Hyphen library software, the application ruby-hyphen, and the library
12
+ (gem) as a compilation is licensed under the terms of the MIT license. The
13
+ files in this distribution covered by this license are in the list below called
14
+ "Library Files".
15
+
16
+ Individual language hyphenation files (in the list called "Language Files") are
17
+ maintained under the license described in the language file itself; the
18
+ copyright for these original files is held by the original authors; any
19
+ mistakes made in conversion of these files to Ruby is attributable to the
20
+ contributors of the Text::Hyphen package only. If license information is not
21
+ present in a given Language File, it should be considered under the terms of
22
+ TeX.
23
+
24
+ === Library License
25
+ * Copyright Austin Ziegler, 2004–2011.
26
+
27
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
28
+ this software and associated documentation files (the "Software"), to deal in
29
+ the Software without restriction, including without limitation the rights to
30
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
31
+ of the Software, and to permit persons to whom the Software is furnished to do
32
+ so, subject to the following conditions:
33
+
34
+ The above copyright notice and this permission notice shall be included in all
35
+ copies or substantial portions of the Software.
36
+
37
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
38
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
39
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
40
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
41
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
42
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
43
+ SOFTWARE.
44
+
45
+ The copyright on the Text::Hyphen application/library and the Ruby
46
+ translations of hyphenation files belongs to Austin Ziegler. All other
47
+ copyrights on original versions still stand; Text::Hyphen is a derivative work
48
+ of these and other projects.
49
+
50
+ === Library Files
51
+ Note that while this list appears to include language files, these are "loader"
52
+ files only and do not contain the hyphenation patterns themselves.
53
+
54
+ * lib/text-hyphen.rb
55
+ * lib/text/hyphen.rb
56
+ * lib/text/hyphen/language.rb
57
+ * lib/text/hyphen/language/ca.rb
58
+ * lib/text/hyphen/language/cs.rb
59
+ * lib/text/hyphen/language/da.rb
60
+ * lib/text/hyphen/language/de.rb
61
+ * lib/text/hyphen/language/de1.rb
62
+ * lib/text/hyphen/language/de2.rb
63
+ * lib/text/hyphen/language/en_uk.rb
64
+ * lib/text/hyphen/language/en_us.rb
65
+ * lib/text/hyphen/language/es.rb
66
+ * lib/text/hyphen/language/et.rb
67
+ * lib/text/hyphen/language/eu.rb
68
+ * lib/text/hyphen/language/fi.rb
69
+ * lib/text/hyphen/language/fr.rb
70
+ * lib/text/hyphen/language/ga.rb
71
+ * lib/text/hyphen/language/hr.rb
72
+ * lib/text/hyphen/language/hsb.rb
73
+ * lib/text/hyphen/language/hu.rb
74
+ * lib/text/hyphen/language/hu1.rb
75
+ * lib/text/hyphen/language/hu2.rb
76
+ * lib/text/hyphen/language/ia.rb
77
+ * lib/text/hyphen/language/id.rb
78
+ * lib/text/hyphen/language/is.rb
79
+ * lib/text/hyphen/language/it.rb
80
+ * lib/text/hyphen/language/la.rb
81
+ * lib/text/hyphen/language/mn.rb
82
+ * lib/text/hyphen/language/ms.rb
83
+ * lib/text/hyphen/language/nl.rb
84
+ * lib/text/hyphen/language/no.rb
85
+ * lib/text/hyphen/language/no1.rb
86
+ * lib/text/hyphen/language/no2.rb
87
+ * lib/text/hyphen/language/pl.rb
88
+ * lib/text/hyphen/language/pt.rb
89
+ * lib/text/hyphen/language/sv.rb
90
+ * test/data/bug_9807_latin1.rb
91
+ * test/data/bug_9807_utf-8.rb
92
+ * test/test_bugs.rb
93
+ * test/test_text_hyphen.rb
94
+ * text-hyphen.gemspec
95
+ * History.rdoc
96
+ * License.rdoc
97
+ * README.rdoc
98
+ * Rakefile
99
+ * bin/ruby-hyphen
100
+
101
+ === Language Files
102
+ * lib/text/hyphen/language/1.8/ca.rb
103
+ * lib/text/hyphen/language/1.8/cs.rb
104
+ * lib/text/hyphen/language/1.8/da.rb
105
+ * lib/text/hyphen/language/1.8/de1.rb
106
+ * lib/text/hyphen/language/1.8/de2.rb
107
+ * lib/text/hyphen/language/1.8/en_uk.rb
108
+ * lib/text/hyphen/language/1.8/en_us.rb
109
+ * lib/text/hyphen/language/1.8/es.rb
110
+ * lib/text/hyphen/language/1.8/et.rb
111
+ * lib/text/hyphen/language/1.8/eu.rb
112
+ * lib/text/hyphen/language/1.8/fi.rb
113
+ * lib/text/hyphen/language/1.8/fr.rb
114
+ * lib/text/hyphen/language/1.8/ga.rb
115
+ * lib/text/hyphen/language/1.8/hr.rb
116
+ * lib/text/hyphen/language/1.8/hsb.rb
117
+ * lib/text/hyphen/language/1.8/hu1.rb
118
+ * lib/text/hyphen/language/1.8/hu2.rb
119
+ * lib/text/hyphen/language/1.8/ia.rb
120
+ * lib/text/hyphen/language/1.8/id.rb
121
+ * lib/text/hyphen/language/1.8/is.rb
122
+ * lib/text/hyphen/language/1.8/it.rb
123
+ * lib/text/hyphen/language/1.8/la.rb
124
+ * lib/text/hyphen/language/1.8/mn.rb
125
+ * lib/text/hyphen/language/1.8/nl.rb
126
+ * lib/text/hyphen/language/1.8/no1.rb
127
+ * lib/text/hyphen/language/1.8/no2.rb
128
+ * lib/text/hyphen/language/1.8/pl.rb
129
+ * lib/text/hyphen/language/1.8/pt.rb
130
+ * lib/text/hyphen/language/1.8/sv.rb
131
+ * lib/text/hyphen/language/1.9/ca.rb
132
+ * lib/text/hyphen/language/1.9/cs.rb
133
+ * lib/text/hyphen/language/1.9/da.rb
134
+ * lib/text/hyphen/language/1.9/de1.rb
135
+ * lib/text/hyphen/language/1.9/de2.rb
136
+ * lib/text/hyphen/language/1.9/en_uk.rb
137
+ * lib/text/hyphen/language/1.9/en_us.rb
138
+ * lib/text/hyphen/language/1.9/es.rb
139
+ * lib/text/hyphen/language/1.9/et.rb
140
+ * lib/text/hyphen/language/1.9/eu.rb
141
+ * lib/text/hyphen/language/1.9/fi.rb
142
+ * lib/text/hyphen/language/1.9/fr.rb
143
+ * lib/text/hyphen/language/1.9/ga.rb
144
+ * lib/text/hyphen/language/1.9/hr.rb
145
+ * lib/text/hyphen/language/1.9/hsb.rb
146
+ * lib/text/hyphen/language/1.9/hu1.rb
147
+ * lib/text/hyphen/language/1.9/hu2.rb
148
+ * lib/text/hyphen/language/1.9/ia.rb
149
+ * lib/text/hyphen/language/1.9/id.rb
150
+ * lib/text/hyphen/language/1.9/is.rb
151
+ * lib/text/hyphen/language/1.9/it.rb
152
+ * lib/text/hyphen/language/1.9/la.rb
153
+ * lib/text/hyphen/language/1.9/mn.rb
154
+ * lib/text/hyphen/language/1.9/nl.rb
155
+ * lib/text/hyphen/language/1.9/no1.rb
156
+ * lib/text/hyphen/language/1.9/no2.rb
157
+ * lib/text/hyphen/language/1.9/pl.rb
158
+ * lib/text/hyphen/language/1.9/pt.rb
159
+ * lib/text/hyphen/language/1.9/sv.rb
@@ -1,14 +1,71 @@
1
1
  .autotest
2
- COPYING.txt
3
- History.txt
4
- LICENCE.txt
2
+ History.rdoc
3
+ License.rdoc
5
4
  Manifest.txt
6
- README.txt
5
+ README.rdoc
7
6
  Rakefile
8
- bin/hyphen
7
+ bin/ruby-hyphen
9
8
  lib/text-hyphen.rb
10
9
  lib/text/hyphen.rb
11
10
  lib/text/hyphen/language.rb
11
+ lib/text/hyphen/language/1.8/ca.rb
12
+ lib/text/hyphen/language/1.8/cs.rb
13
+ lib/text/hyphen/language/1.8/da.rb
14
+ lib/text/hyphen/language/1.8/de1.rb
15
+ lib/text/hyphen/language/1.8/de2.rb
16
+ lib/text/hyphen/language/1.8/en_uk.rb
17
+ lib/text/hyphen/language/1.8/en_us.rb
18
+ lib/text/hyphen/language/1.8/es.rb
19
+ lib/text/hyphen/language/1.8/et.rb
20
+ lib/text/hyphen/language/1.8/eu.rb
21
+ lib/text/hyphen/language/1.8/fi.rb
22
+ lib/text/hyphen/language/1.8/fr.rb
23
+ lib/text/hyphen/language/1.8/ga.rb
24
+ lib/text/hyphen/language/1.8/hr.rb
25
+ lib/text/hyphen/language/1.8/hsb.rb
26
+ lib/text/hyphen/language/1.8/hu1.rb
27
+ lib/text/hyphen/language/1.8/hu2.rb
28
+ lib/text/hyphen/language/1.8/ia.rb
29
+ lib/text/hyphen/language/1.8/id.rb
30
+ lib/text/hyphen/language/1.8/is.rb
31
+ lib/text/hyphen/language/1.8/it.rb
32
+ lib/text/hyphen/language/1.8/la.rb
33
+ lib/text/hyphen/language/1.8/mn.rb
34
+ lib/text/hyphen/language/1.8/nl.rb
35
+ lib/text/hyphen/language/1.8/no1.rb
36
+ lib/text/hyphen/language/1.8/no2.rb
37
+ lib/text/hyphen/language/1.8/pl.rb
38
+ lib/text/hyphen/language/1.8/pt.rb
39
+ lib/text/hyphen/language/1.8/sv.rb
40
+ lib/text/hyphen/language/1.9/ca.rb
41
+ lib/text/hyphen/language/1.9/cs.rb
42
+ lib/text/hyphen/language/1.9/da.rb
43
+ lib/text/hyphen/language/1.9/de1.rb
44
+ lib/text/hyphen/language/1.9/de2.rb
45
+ lib/text/hyphen/language/1.9/en_uk.rb
46
+ lib/text/hyphen/language/1.9/en_us.rb
47
+ lib/text/hyphen/language/1.9/es.rb
48
+ lib/text/hyphen/language/1.9/et.rb
49
+ lib/text/hyphen/language/1.9/eu.rb
50
+ lib/text/hyphen/language/1.9/fi.rb
51
+ lib/text/hyphen/language/1.9/fr.rb
52
+ lib/text/hyphen/language/1.9/ga.rb
53
+ lib/text/hyphen/language/1.9/hr.rb
54
+ lib/text/hyphen/language/1.9/hsb.rb
55
+ lib/text/hyphen/language/1.9/hu1.rb
56
+ lib/text/hyphen/language/1.9/hu2.rb
57
+ lib/text/hyphen/language/1.9/ia.rb
58
+ lib/text/hyphen/language/1.9/id.rb
59
+ lib/text/hyphen/language/1.9/is.rb
60
+ lib/text/hyphen/language/1.9/it.rb
61
+ lib/text/hyphen/language/1.9/la.rb
62
+ lib/text/hyphen/language/1.9/mn.rb
63
+ lib/text/hyphen/language/1.9/nl.rb
64
+ lib/text/hyphen/language/1.9/no1.rb
65
+ lib/text/hyphen/language/1.9/no2.rb
66
+ lib/text/hyphen/language/1.9/pl.rb
67
+ lib/text/hyphen/language/1.9/pt.rb
68
+ lib/text/hyphen/language/1.9/sv.rb
12
69
  lib/text/hyphen/language/ca.rb
13
70
  lib/text/hyphen/language/cs.rb
14
71
  lib/text/hyphen/language/da.rb
@@ -25,6 +82,7 @@ lib/text/hyphen/language/fr.rb
25
82
  lib/text/hyphen/language/ga.rb
26
83
  lib/text/hyphen/language/hr.rb
27
84
  lib/text/hyphen/language/hsb.rb
85
+ lib/text/hyphen/language/hu.rb
28
86
  lib/text/hyphen/language/hu1.rb
29
87
  lib/text/hyphen/language/hu2.rb
30
88
  lib/text/hyphen/language/ia.rb
@@ -33,12 +91,16 @@ lib/text/hyphen/language/is.rb
33
91
  lib/text/hyphen/language/it.rb
34
92
  lib/text/hyphen/language/la.rb
35
93
  lib/text/hyphen/language/mn.rb
94
+ lib/text/hyphen/language/ms.rb
36
95
  lib/text/hyphen/language/nl.rb
96
+ lib/text/hyphen/language/no.rb
37
97
  lib/text/hyphen/language/no1.rb
38
98
  lib/text/hyphen/language/no2.rb
39
99
  lib/text/hyphen/language/pl.rb
40
100
  lib/text/hyphen/language/pt.rb
41
101
  lib/text/hyphen/language/sv.rb
102
+ test/data/bug_9807_latin1.rb
103
+ test/data/bug_9807_utf-8.rb
42
104
  test/test_bugs.rb
43
105
  test/test_text_hyphen.rb
44
106
  text-hyphen.gemspec
@@ -0,0 +1,69 @@
1
+ = text-hyphen
2
+
3
+ == Description
4
+
5
+ Text::Hyphen is a Ruby library to hyphenate words in various languages using
6
+ Ruby-fied versions of TeX hyphenation patterns. It will properly hyphenate
7
+ various words according to the rules of the language the word is written in.
8
+ The algorithm is based on that of the TeX typesetting system by Donald E.
9
+ Knuth.
10
+
11
+ This is originally based on the Perl implementation of
12
+ {TeX::Hyphen}[http://search.cpan.org/author/JANPAZ/TeX-Hyphen-0.140/lib/TeX/Hyphen.pm]
13
+ and the {Ruby port}[http://rubyforge.org/projects/text-format]. The language
14
+ hyphenation pattern files are based on the sources available from
15
+ {CTAN}[http://www.ctan.org] as of 2004.12.19 and have been manually translated
16
+ by Austin Ziegler.
17
+
18
+ This release is 1.2. This is a major release providing both Ruby 1.8.7 and Ruby
19
+ 1.9.2 support. This is the last major release supporting Ruby 1.8 interpreters.
20
+ Future versions will only work with Ruby 1.9 or later interpreters.
21
+
22
+ == Where
23
+
24
+ * {RubyForge}[http://rubyforge.org/projects/text-format/]
25
+ * {RubyGems}[https://rubygems.org/gems/text-hyphen]
26
+ * {GitHub}[https://github.com/halostatue/text-hyphen/]
27
+ * {RDoc}[http://rdoc.info/github/halostatue/text-hyphen/master/frames]
28
+
29
+ == Synopsis
30
+
31
+ require 'text/hyphen'
32
+ hh = Text::Hyphen.new(:language => 'en_us', :left => 2, :right => 2)
33
+ # Defaults to the above
34
+ hh = TeX::Hyphen.new
35
+
36
+ word = "representation"
37
+ points = hyp.hyphenate(word) #=> [3, 5, 8, 10]
38
+ puts hyp.visualize(word) #=> rep-re-sen-ta-tion
39
+
40
+ Text::Hyphen is truly multilingual, with 29 languages or language variants
41
+ supported. As an example, consider the difference between the following:
42
+
43
+ require 'text/hyphen'
44
+ # Using left and right minimum values of 0 ensures that you will see all
45
+ # possible hyphenation points, not just those that meet the minimum width
46
+ # requirements.
47
+ en = Text::Hyphen.new(:left => 0, :right => 0)
48
+ fr = Text::Hyphen.new(:language => "fr", :left => 0, :right => 0)
49
+
50
+ puts en.visualise("organiser") #=> or-gan-iser
51
+ puts fr.visualise("organiser") #=> or-ga-ni-ser
52
+
53
+ As you can see, the hyphenation is distinct between the two hyphenators.
54
+ Additional improvements over TeX::Hyphen include thread safety (except for
55
+ debug control) and support for UTF-8 under Ruby 1.9.
56
+
57
+ == Install
58
+ gem install text-hyphen
59
+
60
+ == Developers
61
+
62
+ After checking out the source, run:
63
+
64
+ $ rake newb
65
+
66
+ This task will install any missing dependencies, run the tests/specs,
67
+ and generate the RDoc.
68
+
69
+ :include: License.rdoc
data/Rakefile CHANGED
@@ -1,19 +1,23 @@
1
- # -*- ruby -*-
1
+ # -*- ruby encoding: utf-8 -*-
2
2
 
3
3
  require 'rubygems'
4
4
  require 'hoe'
5
- require 'rubyforge'
6
5
 
7
6
  Hoe.plugin :doofus
8
7
  Hoe.plugin :gemspec
9
8
  Hoe.plugin :git
10
9
  Hoe.plugin :rubyforge
11
10
 
12
- Hoe.spec 'text-hyphen' do
11
+ Hoe.spec 'text-hyphen' do |spec|
13
12
  developer('Austin Ziegler', 'austin@rubyforge.org')
13
+
14
14
  self.rubyforge_name = 'text-format'
15
+ spec.remote_rdoc_dir = 'text-hyphen/rdoc'
16
+ spec.rsync_args << ' --exclude=statsvn/'
15
17
 
16
- self.spec_extras[:required_ruby_version] = '< 1.9'
18
+ spec.history_file = 'History.rdoc'
19
+ spec.readme_file = 'README.rdoc'
20
+ spec.extra_rdoc_files = FileList["*.rdoc"].to_a
17
21
 
18
22
  self.extra_dev_deps << ['hoe-doofus', '~> 1.0']
19
23
  self.extra_dev_deps << ['hoe-gemspec', '~> 1.0']
File without changes
@@ -1 +1,2 @@
1
+ # -*- ruby encoding: utf-8 -*-
1
2
  require 'text/hyphen'
@@ -1,95 +1,26 @@
1
- module Text; end
2
-
3
- # = Introduction
4
- # Text::Hyphen -- hyphenate words using modified versions of TeX hyphenation
5
- # patterns.
6
- #
7
- # == Usage
8
- # require 'text/hyphen'
9
- # hh = Text::Hyphen.new(:language => 'en_us', :left => 2, :right => 2)
10
- # # Defaults to the above
11
- # hh = TeX::Hyphen.new
12
- #
13
- # word = "representation"
14
- # points = hyp.hyphenate(word) #=> [3, 5, 8, 10]
15
- # puts hyp.visualize(word) #=> rep-re-sen-ta-tion
16
- #
17
- # en = Text::Hyphen.new(:left => 0, :right => 0)
18
- # fr = Text::Hyphen.new(:language = "fr", :left => 0, :right => 0)
19
- # puts en.visualise("organiser") #=> or-gan-iser
20
- # puts fr.visualise("organiser") #=> or-ga-ni-ser
21
- #
22
- # == Description
23
- # Creates a new Hyphen object and loads the language patterns into memory.
24
- # The hyphenator can then be asked for the hyphenation of a word. If no
25
- # language is specified, then the language en_us (EN_US) is used by default.
26
- #
27
- # Copyright:: Copyright (c) 2004 - 2005 Austin Ziegler
28
- # Version:: 1.0.2
29
- # Based On:: <tt>TeX::Hyphen</tt> 0.4 Copyright (c) 2003 - 2004
30
- # Martin DeMello and Austin Ziegler, in turn based on
31
- # Perl's <tt>TeX::Hyphen</tt>
32
- # [http://search.cpan.org/author/JANPAZ/TeX-Hyphen-0.140/lib/TeX/Hyphen.pm]
33
- # Copyright (c) 1997 - 2002 Jan Pazdziora
34
- #
35
- # == Licence
36
- # Licensing for Text::Hyphen is unfortunately complex because of the various
37
- # copyrights and licences of the source hyphenation files. Some of these
38
- # files are available only under the TeX licence and others are available
39
- # only under the GNU GPL while others are public domain. Each language file
40
- # has these licences embedded within the file. Please consult each file's
41
- # licence to ensure that it is compatible with your application.
42
- #
43
- # The copyright on the Text::Hyphen application/library and the Ruby
44
- # translations of hyphenation files belongs to Austin Ziegler. All other
45
- # copyrights on original versions still stand; Text::Hyphen is a derivative
46
- # work of these and other projects.
47
- #
48
- # === Application and Compilation Licences
49
- # Text::Hyphen, the application/library is licensed under the same terms as
50
- # Ruby. Note that this specifically refers to the contents of bin/hyphen,
51
- # lib/text/hyphen.rb, and lib/text/hyphen/language.rb.
52
- #
53
- # Individual language hyphenation files are NOT licensed under these terms,
54
- # but under the following MIT-style licence and the original hyphenation
55
- # pattern licenses. The copyright for the original TeX hyphenation files is
56
- # held by the original authors; any mistakes in conversion of these files to
57
- # Ruby is attributable to the contributors to the Text::Hyphen package only.
58
- #
59
- # The compilation package Text::Hyphen is licensed under the same terms as
60
- # Ruby.
61
- #
62
- # === Blanket Language Hyphenation File Licence
63
- # Permission is hereby granted, free of charge, to any person obtaining a
64
- # copy of this software and associated documentation files (the "Software"),
65
- # to deal in the Software without restriction, including without limitation
66
- # the rights to use, copy, modify, merge, publish, distribute, sublicense,
67
- # and/or sell copies of the Software, and to permit persons to whom the
68
- # Software is furnished to do so, subject to the following conditions:
69
- #
70
- # The above copyright notice and this permission notice shall be included in
71
- # all copies or substantial portions of the Software.
72
- #
73
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
74
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
75
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
76
- # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
77
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
78
- # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
79
- # DEALINGS IN THE SOFTWARE.
1
+ # -*- ruby encoding: utf-8 -*-
2
+ module Text # :nodoc:
3
+ end
4
+
5
+ # An object that knows how to perform hyphenation based on the TeX
6
+ # hyphenation algorithm with pattern files. Each object is constructed with
7
+ # a specific language's hyphenation patterns.
80
8
  class Text::Hyphen
81
9
  DEBUG = false
82
- VERSION = '1.0.2'
10
+ VERSION = '1.2'
83
11
 
84
12
  DEFAULT_MIN_LEFT = 2
85
13
  DEFAULT_MIN_RIGHT = 2
86
14
 
87
15
  # No fewer than this number of letters will show up to the left of the
88
- # hyphen. This overrides the default specified in the language.
16
+ # hyphen. The initial value for this will be specified by the language;
17
+ # setting this value will override the language's defaults.
89
18
  attr_accessor :left
19
+
90
20
  # No fewer than this number of letters will show up to the right of the
91
21
  # hyphen. This overrides the default specified in the language.
92
22
  attr_accessor :right
23
+
93
24
  # The name of the language to be used in hyphenating words. This will be a
94
25
  # two or three character ISO 639 code, with the two character form being
95
26
  # the canonical resource name. This will load the language hyphenation
@@ -101,14 +32,15 @@ class Text::Hyphen
101
32
  # Minimal transformations will be performed on the language code provided,
102
33
  # such that any dashes are converted to underscores (e.g., 'en-us' becomes
103
34
  # 'en_us') and all characters are regularised. Resource names will be
104
- # downcased and class names will be upcased (e.g., 'Pt' for the Portuguese
105
- # language becomes 'pt' and 'PT', respectively).
35
+ # downcased and class names will be converted to uppercase (e.g., 'Pt' for
36
+ # the Portuguese language becomes 'pt' and 'PT', respectively).
106
37
  #
107
38
  # The language may also be specified as an instance of
108
39
  # Text::Hyphen::Language.
109
40
  attr_accessor :language
41
+
110
42
  undef :language=
111
- def language=(lang)
43
+ def language=(lang) #:nodoc:
112
44
  require 'text/hyphen/language' unless defined?(Text::Hyphen::Language)
113
45
  if lang.kind_of? Text::Hyphen::Language
114
46
  @iso_language = lang.to_s.split(%r{::}o)[-1].downcase
@@ -119,13 +51,27 @@ class Text::Hyphen
119
51
  end
120
52
  @iso_language
121
53
  end
54
+
122
55
  # Returns the language's ISO 639 ID, e.g., "en_us" or "pt".
123
- attr_reader :iso_language
56
+ attr_reader :iso_language
124
57
 
125
- # The following initializations are equivalent:
58
+ # Creates a hyphenation object with the options requested. The options
59
+ # available are:
126
60
  #
127
- # hyp = TeX::Hyphenate.new(:language => "EU")
128
- # hyp = TeX::Hyphenate.new { |h| h.language = "EU" }
61
+ # language:: The language to perform hyphenation with. See #language and
62
+ # #iso_language.
63
+ # left:: The minimum number of characters to the left of a
64
+ # hyphenation point. See #left.
65
+ # right:: The minimum number of characters to the right of a
66
+ # hyphenation point. See #right.
67
+ #
68
+ # The options can be provided either as hashed parameters or set as
69
+ # methods in an initialization block. The following initializations are
70
+ # all equivalent:
71
+ #
72
+ # hyp = Text::Hyphenate.new(:language => 'en_us')
73
+ # hyp = Text::Hyphenate.new(language: 'en_us') # under Ruby 1.9
74
+ # hyp = Text::Hyphenate.new { |h| h.language = 'en_us' }
129
75
  def initialize(options = {}) # :yields self:
130
76
  @iso_language = options[:language]
131
77
  @left = options[:left]
@@ -147,16 +93,16 @@ class Text::Hyphen
147
93
 
148
94
  load_language
149
95
 
150
- @left ||= DEFAULT_MIN_LEFT
151
- @right ||= DEFAULT_MIN_RIGHT
96
+ @left ||= DEFAULT_MIN_LEFT
97
+ @right ||= DEFAULT_MIN_RIGHT
152
98
  end
153
99
 
154
- # Returns a list of places where the word can be divided, as
100
+ # Returns an array of character positions where a word can be hyphenated.
155
101
  #
156
- # hyp.hyphenate('representation')
102
+ # hyp.hyphenate('representation') #=> [3, 5, 8 10]
157
103
  #
158
- # returns [3, 5, 8, 10]. If the word has been hyphenated previously, it
159
- # will be returned from a per-instance cache.
104
+ # Because hyphenation can be expensive, if the word has been hyphenated
105
+ # previously, it will be returned from a per-instance cache.
160
106
  def hyphenate(word)
161
107
  word = word.downcase
162
108
  $stderr.puts "Hyphenating #{word}" if DEBUG
@@ -164,28 +110,32 @@ class Text::Hyphen
164
110
  res = @language.exceptions[word]
165
111
  return @cache[word] = make_result_list(res) if res
166
112
 
167
- result = [0] * (word.split(//).size + 1)
168
- rightstop = word.split(//).size - @right
113
+ letters = word.scan(@language.scan_re)
114
+ $stderr.puts letters.inspect if DEBUG
115
+ word_size = letters.size
116
+
117
+ result = [0] * (word_size + 1)
118
+ right_stop = word_size - @right
169
119
 
170
120
  updater = Proc.new do |hash, str, pos|
171
121
  if hash.has_key?(str)
172
122
  $stderr.print "#{pos}: #{str}: #{hash[str]}" if DEBUG
173
- hash[str].split(//).each_with_index do |cc, ii|
123
+ hash[str].scan(@language.scan_re).each_with_index do |cc, ii|
174
124
  cc = cc.to_i
175
125
  result[ii + pos] = cc if cc > result[ii + pos]
176
126
  end
177
- $stderr.print ": #{result}\n" if DEBUG
127
+ $stderr.print ": #{result.inspect}\n" if DEBUG
178
128
  end
179
129
  end
180
130
 
181
131
  # Walk the word
182
- (0..rightstop).each do |pos|
183
- restlength = word.length - pos
184
- (1..restlength).each do |length|
185
- substr = word[pos, length]
132
+ (0..right_stop).each do |pos|
133
+ rest_length = word_size - pos
134
+ (1..rest_length).each do |length|
135
+ substr = letters[pos, length].join('')
186
136
  updater[@language.hyphen, substr, pos]
187
137
  updater[@language.start, substr, pos] if pos.zero?
188
- updater[@language.stop, substr, pos] if (length == restlength)
138
+ updater[@language.stop, substr, pos] if (length == rest_length)
189
139
  end
190
140
  end
191
141
 
@@ -196,23 +146,23 @@ class Text::Hyphen
196
146
  @cache[word] = make_result_list(result)
197
147
  end
198
148
 
199
- # Returns a visualization of the hyphenation points, so:
149
+ # Returns a visualization of the hyphenation points.
200
150
  #
201
- # hyp.visualize('representation')
151
+ # hyp.visualize('representation') #=> rep-re-sen-ta-tion
202
152
  #
203
- # returns <tt>rep-re-sen-ta-tion</tt>, at least for English patterns. If
204
- # the word has been visualised previously, it will be returned from a
205
- # per-instance cache.
153
+ # Because hyphenation can be expensive, if the word has been visualised
154
+ # previously, it will be returned from a per-instance cache.
206
155
  def visualise(word)
207
156
  return @vcache[word] if @vcache.has_key?(word)
208
157
  w = word.dup
209
- hyphenate(w).each_with_index do |pos, n|
158
+ hyphenate(w).each_with_index do |pos, n|
210
159
  w[pos.to_i + n, 0] = '-' if pos != 0
211
160
  end
212
161
  @vcache[word] = w
213
162
  end
214
163
  alias visualize visualise
215
164
 
165
+ # Clears the per-instance hyphenation and visualization caches.
216
166
  def clear_cache!
217
167
  @cache.clear
218
168
  @vcache.clear
@@ -229,7 +179,8 @@ class Text::Hyphen
229
179
  end
230
180
  end
231
181
 
232
- # Returns statistics
182
+ # Returns a string describing the structure of the patterns for the
183
+ # language of this hyphenation object.
233
184
  def stats
234
185
  _b = @language.both.size
235
186
  _s = @language.start.size
@@ -254,7 +205,7 @@ EOS
254
205
  def updateresult(hash, str, pos)
255
206
  if hash.has_key?(str)
256
207
  STDERR.print "#{pos}: #{str}: #{hash[str]}" if DEBUG
257
- hash[str].split('').each_with_index do |c, i|
208
+ hash[str].scan(@language.scan_re).each_with_index do |c, i|
258
209
  c = c.to_i
259
210
  @result[i + pos] = c if c > @result[i + pos]
260
211
  end
@@ -287,4 +238,16 @@ EOS
287
238
  @iso_language
288
239
  end
289
240
  private :load_language
241
+
242
+ # Resolves a file for cleaner loading from a hyphenation loader file.
243
+ def self.require_real_hyphenation_file(loader) # :nodoc:
244
+ p = File.dirname(loader)
245
+ f = File.basename(loader)
246
+ v = if RUBY_VERSION < "1.9.1"
247
+ "1.8"
248
+ else
249
+ "1.9"
250
+ end
251
+ require File.join(p, v, f)
252
+ end
290
253
  end