hpricot 0.6-jruby

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/CHANGELOG +62 -0
  2. data/COPYING +18 -0
  3. data/README +284 -0
  4. data/Rakefile +211 -0
  5. data/ext/hpricot_scan/HpricotScanService.java +1340 -0
  6. data/ext/hpricot_scan/extconf.rb +6 -0
  7. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  8. data/ext/hpricot_scan/hpricot_scan.c +5976 -0
  9. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  10. data/ext/hpricot_scan/hpricot_scan.java.rl +363 -0
  11. data/ext/hpricot_scan/hpricot_scan.rl +273 -0
  12. data/extras/mingw-rbconfig.rb +176 -0
  13. data/lib/hpricot.rb +26 -0
  14. data/lib/hpricot/blankslate.rb +63 -0
  15. data/lib/hpricot/builder.rb +200 -0
  16. data/lib/hpricot/elements.rb +510 -0
  17. data/lib/hpricot/htmlinfo.rb +672 -0
  18. data/lib/hpricot/inspect.rb +107 -0
  19. data/lib/hpricot/modules.rb +37 -0
  20. data/lib/hpricot/parse.rb +297 -0
  21. data/lib/hpricot/tag.rb +228 -0
  22. data/lib/hpricot/tags.rb +164 -0
  23. data/lib/hpricot/traverse.rb +821 -0
  24. data/lib/hpricot/xchar.rb +94 -0
  25. data/lib/i686-linux/hpricot_scan.jar +0 -0
  26. data/test/files/basic.xhtml +17 -0
  27. data/test/files/boingboing.html +2266 -0
  28. data/test/files/cy0.html +3653 -0
  29. data/test/files/immob.html +400 -0
  30. data/test/files/pace_application.html +1320 -0
  31. data/test/files/tenderlove.html +16 -0
  32. data/test/files/uswebgen.html +220 -0
  33. data/test/files/utf8.html +1054 -0
  34. data/test/files/week9.html +1723 -0
  35. data/test/files/why.xml +19 -0
  36. data/test/load_files.rb +7 -0
  37. data/test/test_alter.rb +65 -0
  38. data/test/test_builder.rb +24 -0
  39. data/test/test_parser.rb +379 -0
  40. data/test/test_paths.rb +16 -0
  41. data/test/test_preserved.rb +66 -0
  42. data/test/test_xml.rb +28 -0
  43. metadata +98 -0
@@ -0,0 +1,16 @@
1
+ <html>
2
+ <HEAD>
3
+ <meta http-equiv="Refresh" content="0; url=http://tenderlovemaking.com">
4
+ <META http-equiv="Refresh" content="0; url=http://tenderlovemaking.com">
5
+ </head>
6
+ <body>
7
+ <a href ="http://tenderlovemaking.com/">My Site!</a>
8
+ <A href ="http://whytheluckystiff.net/">Your Site!</A>
9
+ <MAP>
10
+ <area HREF="http://whytheluckystiff.net/" COORDS="1,2,3,4"></area>
11
+ <AREA HREF="http://tenderlovemaking.com/" COORDS="1,2,3,4">
12
+ </area>
13
+ <AREA HREF="http://tenderlovemaking.com/" COORDS="5,5,10,10" />
14
+ </MAP>
15
+ </body>
16
+ </html>
@@ -0,0 +1,220 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
2
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3
+ <html xmlns="http://www.w3.org/1999/xhtml">
4
+ <head>
5
+ <title>Free Genealogy and Family History Online - The USGenWeb Project</title>
6
+ <meta name="keywords" content="free genealogy search" />
7
+ <meta name="description" content="Free genealogy and family history online made possible by the USGenWeb Project volunteers. Search free genealogy websites for your ancestors." />
8
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
9
+ <link rel="stylesheet" type="text/css" href="usgw-layout.css" />
10
+ <link rel="stylesheet" type="text/css" href="usgw.css" />
11
+ <style type="text/css">
12
+ <!--
13
+ .pullquote {
14
+ font-family: Verdana, Arial, Helvetica, sans-serif;
15
+ font-size: 12px;
16
+ float: right;
17
+ width: 185px;
18
+ margin-top: 10px;
19
+ margin-bottom: 2px;
20
+ border-top-width: 10px;
21
+ border-bottom-width: 3px;
22
+ border-top-style: solid;
23
+ border-bottom-style: solid;
24
+ border-top-color: #38386E;
25
+ border-right-color: #38386E;
26
+ border-bottom-color: #38386E;
27
+ border-left-color: #38386E;
28
+ font-style: italic;
29
+ font-weight: normal;
30
+ border-right-width: 1px;
31
+ border-left-width: 1px;
32
+ border-right-style: solid;
33
+ border-left-style: solid;
34
+ padding: 3px;
35
+ }
36
+ .style2 {color: #003366}
37
+ -->
38
+ </style>
39
+ </head>
40
+ <body>
41
+ <!-- HEADER DIV -->
42
+ <div id="hdr">
43
+ <div align="center"><img alt="The USGenWeb Project, Free Genealogy Online" src="images/widelogo.jpg" width="740" height="150" /></div>
44
+
45
+ </div>
46
+ <!-- HEADER LINKS -->
47
+ <div id="hdr2">
48
+
49
+ <div align="center"><img src="images/navbar.gif" width="740" height="30" usemap="#Map" border="0" />
50
+ <map name="Map">
51
+ <area shape="rect" coords="46,1,126,28" href="index.shtml" alt="Home">
52
+ <area shape="rect" coords="134,1,223,28" href="about/index.shtml" alt="About Us">
53
+ <area shape="rect" coords="239,1,320,30" href="states/index.shtml" alt="States">
54
+ <area shape="rect" coords="332,1,424,28" href="projects/index.shtml" alt="Projects">
55
+ <area shape="rect" coords="444,2,555,28" href="research/index.shtml" alt="Researchers">
56
+ <area shape="rect" coords="575,0,686,28" href="volunteers/index.shtml" alt="Volunteers">
57
+ </map>
58
+ </div>
59
+
60
+ </div>
61
+ <!-- CENTER COLUMN -->
62
+ <div id="c-block">
63
+ <div id="c-col">
64
+ <p>&nbsp;</p>
65
+ <h3 align="center">Keeping Internet Genealogy Free<br />
66
+ <br />
67
+ </h3>
68
+ <div align="left">
69
+ <div>
70
+ <table>
71
+ <tr>
72
+ <td><div class="pullquote">
73
+ <p align="center"><span class="style2"><a href="states/counties.shtml">Counties of the Month</a></span><br />
74
+ <a href="http://www.rootsweb.com/~inmontgo/">Montgomery County, IN</a><br />
75
+ <a href="http://www.rootsweb.com/~flalachu/">Alachua County, FL</a><br />
76
+ <br />
77
+ <span class="style2"><a href="volunteers/FGS.shtml">Upcoming Events</a></span><br />
78
+ FGS Conference 2006<br />
79
+ <br />
80
+ </p>
81
+ </div>
82
+ <p><img src="photos/Gena-Farnham-Wallace.jpg" width="150" height="205" align="left" />
83
+ <p>Welcome to The USGenWeb Project! We are a group of volunteers working together to provide free genealogy websites for genealogical research in every county and every state of the United States. This Project is non-commercial and fully committed to free genealogy access for everyone.</p>
84
+ <p>Organization is by county and state, and this website provides you with links to all the state genealogy websites which, in turn, provide gateways to the counties. The USGenWeb Project also sponsors important Special Projects at the national level and this website provides an entry point to all of those pages, as well.</p>
85
+ <p>Clicking on a State Link (on the left) will take you to the State's website. Clicking on the tabs above will take you to additional information and links. </p>
86
+ <p>All of the volunteers who make up The USGenWeb Project are very proud of this endeavor and hope that you will find their hard work both beneficial and rewarding. Thank you for visiting!</p>
87
+ <p>The USGenWeb Project Team
88
+ </p>
89
+ <h3 align="center">10th Anniversary<br /> <br />
90
+ </h3>
91
+ <div align="left">
92
+ <p><img src="photos/oldphoto1.jpg" width="175" height="200" align="right" />2006 marks the 10th Anniversary of the USGenWeb Project and I have been looking back over those past 10 years. When the USGenWeb Project began, it was one of the few (if not the only) centralized places on the internet to find genealogy information and post a query. Those early state and county sites began with links to the small amount of on-line information of interest to a family historian and a query page. The only Special Project was the Archives. How far the Project has come during the past 10 years! Now there are several special projects and the states, counties and special projects sites of the Project not only contain links; they are filled with information and transcribed records, and more is being added every day by our wonderful, dedicated and hard working volunteers.</p>
93
+ <p>Ten years ago the internet, as we know it today, was in its infancy. The things we take for granted today--e-mail, PCs, cell phones, digital cameras, etc., were not in the average person's world. Family historians and professional genealogists not only didn't use the internet, most had never heard of it.</p>
94
+ <p>Over the past 10 years the internet has gone from obscurity to commonplace. As the internet became an every day tool for millions of people. it changed the way family historians do research. The availability of on-line, easily accessible genealogy and historical information has fueled the phenomenal growth of Genealogy as a hobby and, I'm proud to say, the Project has been right there every step of the way. </p>
95
+ <p>Everywhere we look we see genealogy reported as the fastest growing hobby in the country. Now the internet is the first stop for beginning family historians and is used extensively by experienced researchers. New &quot;How To&quot; genealogy books devote chapters to using the internet, and it is a rare book that does not recommend The USGenWeb Project as one of the first places to visit.</p>
96
+ <p>While subscription sites have popped up everywhere on the web, The Project has continued to offer free access to its vast wealth of information. The USGenWeb Project is recognized as the premier site of free information, and the Project's websites welcome well over a million visitors each day.</p>
97
+ <p>The Project is where it is today because of the thousands of volunteers, both past and present, who cared enough to devote, collectively, millions of hours to gathering, transcribing and uploading information. </p>
98
+ <p>To each and every volunteer, past and present, a heartfelt Thank You, because you are ones who have made The Project the fabulous resource it is today.</p>
99
+ <p>Linda Haas Davenport<br />
100
+ National Coordinator<br />
101
+ The USGenWeb Project</p>
102
+
103
+
104
+
105
+ <p></p></td>
106
+ </tr>
107
+ </table>
108
+ </div>
109
+ </p>
110
+ </div>
111
+
112
+ <br />
113
+ </div>
114
+ <!-- END CENTER COLUMN --></div>
115
+ <!-- END C-BLOCK -->
116
+ <div id="ftr">
117
+ <div align="center">
118
+
119
+ <div align="center"><img src="images/footer-bar.gif" width="740" height="30" usemap="#footerMap" border="0" /></div>
120
+ <map name="footerMap">
121
+ <area shape="rect" coords="430,6,565,25" href="http://www.usgenweb.org">
122
+ </map>
123
+
124
+ </div>
125
+ </div>
126
+ <!-- LEFT COLUMN -->
127
+ <div id="lh-col">
128
+ <span style="margin:10px 10px 10px 10px;"><br />
129
+ <a href="http://www.rootsweb.com/~algenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Alabama Genealogy">Alabama</a><br />
130
+ <a href="http://www.akgenweb.org" rel="nofollow" class="sidenavLnk" target=_blank" title="Alaska Genealogy">Alaska</a><br />
131
+ <a href="http://www.rootsweb.com/~azgenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Arizona Genealogy">Arizona</a><br />
132
+ <a href="http://www.rootsweb.com/~argenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Arkansas Genealogy">Arkansas</a><br />
133
+ <a href="http://cagenweb.com/" rel="nofollow" class="sidenavLnk" target=_blank" title="California Genealogy">California</a><br />
134
+ <a href="http://www.rootsweb.com/~cogenweb/comain.htm" rel="nofollow" class="sidenavLnk" target=_blank" title="Colorado Genealogy">Colorado</a><br />
135
+ <a href="http://www.rootsweb.com/~ctgenweb" rel="nofollow" class="sidenavLnk" target=_blank" title="Connecticut Genealogy">Connecticut</a><br />
136
+ <a href="http://www.degenweb.org/" rel="nofollow" class="sidenavLnk" target=_blank" title="Delaware Genealogy">Delaware</a><br />
137
+ <a href="http://www.rootsweb.com/~dcgenweb/dc_genweb.htm" rel="nofollow" class="sidenavLnk" target=_blank" title="District of Columbia Genealogy">District of Columbia</a><br />
138
+ <a href="http://www.rootsweb.com/~flgenweb/index.html" rel="nofollow" class="sidenavLnk" target=_blank" title="Florida Genealogy">Florida</a><br />
139
+ <a href="http://www.rootsweb.com/~gagenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Georgia Genealogy">Georgia</a><br />
140
+ <a href="http://www.rootsweb.com/~higenweb/hawaii.htm" rel="nofollow" class="sidenavLnk" target=_blank" title="Hawaii Genealogy">Hawaii</a><br />
141
+ <a href="http://www.rootsweb.com/~idgenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Idaho Genealogy">Idaho</a><br />
142
+ <a href="http://ilgenweb.rootsweb.com/" rel="nofollow" class="sidenavLnk" target=_blank" title="Illinois Genealogy">Illinois</a><br />
143
+ <a href="http://www.ingenweb.org" rel="nofollow" class="sidenavLnk" target=_blank" title="Indiana Genealogy">Indiana</a><br />
144
+ <a href="http://IAGenWeb.org" rel="nofollow" class="sidenavLnk" target=_blank" title="Iowa Genealogy">Iowa</a><br />
145
+ <a href="http://skyways.lib.ks.us/genweb/index.html" rel="nofollow" class="sidenavLnk" target=_blank" title="Kansas Genealogy">Kansas</a><br />
146
+ <a href="http://www.kygenweb.net/index.html" rel="nofollow" class="sidenavLnk" target=_blank" title="Kentucky Genealogy">Kentucky</a><br />
147
+ <a href="http://www.lagenweb.org/" rel="nofollow" class="sidenavLnk" target=_blank" title="Louisiana Genealogy">Louisiana</a><br />
148
+ <a href="http://www.rootsweb.com/~megenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Maine Genealogy">Maine</a><br />
149
+ <a href="http://www.mdgenweb.org" rel="nofollow" class="sidenavLnk" target=_blank" title="Maryland Genealogy">Maryland</a><br />
150
+ <a href="http://www.rootsweb.com/~magenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Massachusetts Genealogy">Massachusetts</a><br />
151
+ <a href="http://www.rootsweb.com/~migenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Michigan Genealogy">Michigan</a><br />
152
+ <a href="http://www.rootsweb.com/~mngenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Minnesota Genealogy">Minnesota</a><br />
153
+ <a href="http://www.rootsweb.com/~msgenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Mississippi Genealogy">Mississippi</a><br />
154
+ <a href="http://www.rootsweb.com/~mogenweb/mo.htm" rel="nofollow" class="sidenavLnk" target=_blank" title="Missouri Genealogy">Missouri</a><br />
155
+ <a href="http://rootsweb.com/~mtgenweb" rel="nofollow" class="sidenavLnk" target=_blank" title="Montana Genealogy">Montana</a><br />
156
+ <a href="http://www.rootsweb.com/~negenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Nebraska Genealogy">Nebraska</a><br />
157
+ <a href="http://www.rootsweb.com/~nvgenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Nevada Genealogy">Nevada</a><br />
158
+ <a href="http://www.usroots.com/~usgwnhus/" rel="nofollow" class="sidenavLnk" target=_blank" title="New Hampshire Genealogy">New Hampshire</a><br />
159
+ <a href="http://www.rootsweb.com/~njgenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="New Jersey Genealogy">New Jersey</a><br />
160
+ <a href="http://www.rootsweb.com/~nmgenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="New Mexico Genealogy">New Mexico</a><br />
161
+ <a href="http://www.rootsweb.com/~nygenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="New York Genealogy">New York</a><br />
162
+ <a href="http://www.rootsweb.com/~ncgenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="North Carolina Genealogy">North Carolina</a><br />
163
+ <a href="http://www.rootsweb.com/~ndgenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="North Dakota Genealogy">North Dakota</a><br />
164
+ <a href="http://www.rootsweb.com/~ohgenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Ohio Genealogy">Ohio</a><br />
165
+ <a href="http://www.rootsweb.com/~okgenweb/index.htm" rel="nofollow" class="sidenavLnk" target=_blank" title="Oklahoma Genealogy">Oklahoma</a><br />
166
+ <a href="http://www.rootsweb.com/~itgenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Oklahoma-Indian Territory Genealogy">Oklahoma/Indian Territory</a><br />
167
+ <a href="http://www.rootsweb.com/~orgenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Oregon Genealogy">Oregon</a><br />
168
+ <a href="http://www.pagenweb.org/" rel="nofollow" class="sidenavLnk" target=_blank" title="Pennsylvania Genealogy">Pennsylvania</a><br />
169
+ <a href="http://www.rootsweb.com/~rigenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Rhode Island Genealogy">Rhode Island</a><br />
170
+ <a href="http://sciway3.net/scgenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="South Carolina Genealogy">South Carolina</a><br />
171
+ <a href="http://www.rootsweb.com/~sdgenweb" rel="nofollow" class="sidenavLnk" target=_blank" title="South Dakota Genealogy">South Dakota</a><br />
172
+ <a href="http://www.tngenweb.org/" rel="nofollow" class="sidenavLnk" target=_blank" title="Tennessee Genealogy">Tennessee</a><br />
173
+ <a href="http://www.rootsweb.com/~txgenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Texas Genealogy">Texas</a><br />
174
+ <a href="http://www.rootsweb.com/~utgenweb/index.html" rel="nofollow" class="sidenavLnk" target=_blank" title="Utah Genealogy">Utah</a><br />
175
+ <a href="http://home.att.net/~Local_History/VT_History.htm" rel="nofollow" class="sidenavLnk" target=_blank" title="Vermont Genealogy">Vermont</a><br />
176
+ <a href="http://www.rootsweb.com/~vagenweb" rel="nofollow" class="sidenavLnk" target=_blank" title="Virginia Genealogy">Virginia</a><br />
177
+ <a href="http://www.rootsweb.com/~wagenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Washington Genealogy">Washington</a><br />
178
+ <a href="http://www.rootsweb.com/~wvgenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="West Virginia Genealogy">West Virginia</a><br />
179
+ <a href="http://www.rootsweb.com/~wigenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Wisconsin Genealogy">Wisconsin</a><br />
180
+ <a href="http://www.rootsweb.com/~wygenweb/" rel="nofollow" class="sidenavLnk" target=_blank" title="Wyoming Genealogy">Wyoming</a>
181
+ </span>
182
+ </div>
183
+ <!-- END LEFT COLUMN -->
184
+ <!-- RIGHT COLUMN -->
185
+ <div id="rh-col">
186
+ <br />
187
+ <span style="margin: 10px 0px 6px 6px;">
188
+ <div align="center">
189
+ <p><img alt="The USGenWeb Project, Free Genealogy Online" src="images/usgenweb100x104.gif" width="100" height="104" /></p></div></span>
190
+ <span style="margin: 10px 0px 6px 6px;">
191
+ <div align="left">
192
+ <!-- <h4>Search Engines</h4> -->
193
+ <p><a href="../states/counties.shtml" rel="nofollow" class="sidenavLnk">County Spotlight</a><br />
194
+
195
+ <p><a href="http://www.rootsweb.com/~usgenweb/newsearch.htm" rel="nofollow" class="sidenavLnk" target="_blank">&nbsp;Project Archives</a><br />
196
+ </div>
197
+ <div align="center">
198
+ <hr width="75%" size="1" noshade />
199
+ </div>
200
+ <div align="left">
201
+ <p align="left" class="sidenav">Comments and administrative-type problems should be emailed to the <a href="mailto:lhaasdav@cox.net" class="link">National Coordinator</a>.
202
+ For complaints regarding a specific web site within the USGenWeb Project, please include the URL when emailing the National Coordinator.</p>
203
+ <p align="left" class="sidenav">Direct comments or suggestions about this web site to the <a href="mailto:webmaster@usgenweb.com" class="link">Webmaster</a>. </p>
204
+ <br />
205
+ <p align="center"><a href="http://www.rootsweb.com" rel="nofollow"><img src="images/rootsweb-blue-68x85.gif" width="68" height="85" border="0" alt="Visit Rootsweb"></a></p>
206
+ </div>
207
+ <p>
208
+ <a href="index.shtml" class="sidenavLnk" title="The USGenWeb Project">Home</a><br />
209
+ <a href="about/index.shtml" class="sidenavLnk" title="About The USGenWeb Project">About Us</a><br />
210
+ <a href="projects/index.shtml" class="sidenavLnk" title="Genealogy Projects">Projects</a><br />
211
+ <a href="research/index.shtml" class="sidenavLnk" title="Help for Genealogy Research">for Researchers</a><br />
212
+ <a href="volunteers/index.shtml" class="sidenavLnk" title="USGenWeb Volunteers">for Volunteers</a><br />
213
+ <a href="sitemap.shtml" class="sidenavLnk">Site Map</a></p>
214
+ </span>
215
+
216
+
217
+ </div>
218
+ <!-- END RIGHT COLUMN -->
219
+ </body>
220
+ </html>
@@ -0,0 +1,1054 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
2
+ <html><head><title>UTF-8 Sampler</title>
3
+ <META http-equiv="Content-Type" content="text/html; charset=utf-8">
4
+ </head><body bgcolor="#ffffff" text="#000000">
5
+ <h1><tt>UTF-8 SAMPLER</tt></h1>
6
+
7
+ <big><big>&nbsp;&nbsp;¥&nbsp;·&nbsp;£&nbsp;·&nbsp;€&nbsp;·&nbsp;$&nbsp;·&nbsp;¢&nbsp;·&nbsp;₡&nbsp;·&nbsp;₢&nbsp;·&nbsp;₣&nbsp;·&nbsp;₤&nbsp;·&nbsp;₥&nbsp;·&nbsp;₦&nbsp;·&nbsp;₧&nbsp;·&nbsp;₨&nbsp;·&nbsp;₩&nbsp;·&nbsp;₪&nbsp;·&nbsp;₫&nbsp;·&nbsp;₭&nbsp;·&nbsp;₮&nbsp;·&nbsp;₯</big></big>
8
+
9
+ <p>
10
+ <blockquote>
11
+ Frank da Cruz<br>
12
+ <a href="index.html">The Kermit Project - Columbia University</a><br>
13
+ New York City<br>
14
+ <a href="mailto:fdc@columbia.edu">fdc@columbia.edu</a>
15
+
16
+ <p>
17
+ <i>Last update:</i>
18
+ Wed Apr 12 16:54:07 2006
19
+ </blockquote>
20
+ <p>
21
+ <hr>
22
+ [&nbsp;<a href="http://www.columbia.edu/~fdc/pace/">PEACE</a>&nbsp;]
23
+ [&nbsp;<a href="#poetry">Poetry</a>&nbsp;]
24
+ [&nbsp;<a href="#glass">I Can Eat Glass</a>&nbsp;]
25
+ [&nbsp;<a href="#quickbrownfox">The Quick Brown Fox</a>&nbsp;]
26
+ [&nbsp;<a href="#html">HTML Features</a>&nbsp;]
27
+ [&nbsp;<a href="#credits">Credits, Tools, Commentary</a>&nbsp;]
28
+ <p>
29
+
30
+ <big><big>U</big>TF-8</big> is an ASCII-preserving encoding method for
31
+ <a href="unicode.html">Unicode</a> (ISO 10646), the Universal Character Set
32
+ (UCS). The UCS encodes most of the world's writing systems in a single
33
+ character set, allowing you to mix languages and scripts within a document
34
+ without needing any tricks for switching character sets. This web page is
35
+ encoded directly in UTF-8.
36
+
37
+ <p>
38
+
39
+ As shown <a href="glass.html">HERE</a>,
40
+ Columbia University's <a href="k95.html">Kermit 95</a> terminal emulation
41
+ software can display UTF-8 plain text in Windows 95, 98, ME, NT, XP, or 2000
42
+ when using a monospace Unicode font like <a
43
+ href="http://www.monotype.com">Andale Mono WT J</a> or <a
44
+ href="http://www.evertype.com/emono/">Everson Mono Terminal</a>, or the lesser
45
+ populated Courier New, Lucida Console, or Andale Mono. <a
46
+ href="ckermit.html">C-Kermit</a> can handle it too,
47
+ <a href="http://www.cl.cam.ac.uk/~mgk25/unicode.html">if you have a Unicode
48
+ display</a>. As many languages as are representable in your font can be seen
49
+ on the screen at the same time.
50
+
51
+ <p>
52
+
53
+ This, however, is a Web page. Some Web browsers can handle UTF-8, some can't.
54
+ And those that can might not have a sufficiently populated font to work with
55
+ (some browsers might pick glyphs dynamically from multiple fonts; Netscape 6
56
+ seems to do this).
57
+ <a href="http://www.alanwood.net/unicode/fonts.html">CLICK HERE</a>
58
+ for a survey of Unicode fonts for Windows.
59
+
60
+ <p>
61
+
62
+ The subtitle above shows currency symbols of many lands. If they don't
63
+ appear as blobs, we're off to a good start!
64
+
65
+ <hr>
66
+ <h3><a name="poetry">Poetry</a></h3>
67
+
68
+ From the Anglo-Saxon <a href="http://www.ragweedforge.com/poems.html"><cite>Rune Poem</cite></a> (Rune version):
69
+ <p><blockquote>
70
+ ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ<br>
71
+ ᛋᚳᛖᚪᛚ᛫ᚦᛖᚪᚻ᛫ᛗᚪᚾᚾᚪ᛫ᚷᛖᚻᚹᛦᛚᚳ᛫ᛗᛁᚳᛚᚢᚾ᛫ᚻᛦᛏ᛫ᛞᚫᛚᚪᚾ<br>
72
+ ᚷᛁᚠ᛫ᚻᛖ᛫ᚹᛁᛚᛖ᛫ᚠᚩᚱ᛫ᛞᚱᛁᚻᛏᚾᛖ᛫ᛞᚩᛗᛖᛋ᛫ᚻᛚᛇᛏᚪᚾ᛬<br>
73
+ </blockquote>
74
+ <p>
75
+
76
+ From Laȝamon's<i> <a href="http://mesl.itd.umich.edu/b/brut/">Brut</a></i>
77
+ (<i>The Chronicles of England</i>, Middle English, West Midlands):
78
+ <p>
79
+ <blockquote>
80
+ An preost wes on leoden, Laȝamon was ihoten<br>
81
+ He wes Leovenaðes sone -- liðe him be Drihten.<br>
82
+ He wonede at Ernleȝe at æðelen are chirechen,<br>
83
+ Uppen Sevarne staþe, sel þar him þuhte,<br>
84
+ Onfest Radestone, þer he bock radde.
85
+ </blockquote>
86
+ <p>
87
+
88
+ (The third letter in the author's name is Yogh, missing from many fonts;
89
+ <a href="st-erkenwald.html">CLICK HERE</a> for another Middle English sample
90
+ with some explanation of letters and encoding).
91
+
92
+ <p>
93
+
94
+ From the <cite>Tagelied</cite> of
95
+
96
+ <a href="http://gutenberg.spiegel.de/autoren/eschenba.htm">
97
+ <b>Wolfram von Eschenbach</b></a> (Middle High German):
98
+ <p><blockquote>
99
+ Sîne klâwen durh die wolken sint geslagen,<br>
100
+ er stîget ûf mit grôzer kraft,<br>
101
+ ich sih in grâwen tägelîch als er wil tagen,<br>
102
+ den tac, der im geselleschaft<br>
103
+ erwenden wil, dem werden man,<br>
104
+ den ich mit sorgen în verliez.<br>
105
+ ich bringe in hinnen, ob ich kan.<br>
106
+ sîn vil manegiu tugent michz leisten hiez.<br>
107
+ </blockquote><p>
108
+
109
+ Some lines of
110
+ <a href="http://users.hol.gr/~artemis/odysseas_elytis.htm">
111
+ <b>Odysseus Elytis</b></a> (Greek):
112
+
113
+ <blockquote>
114
+ Τη γλώσσα μου έδωσαν ελληνική<br>
115
+ το σπίτι φτωχικό στις αμμουδιές του Ομήρου.<br>
116
+ Μονάχη έγνοια η γλώσσα μου στις αμμουδιές του Ομήρου.<br>
117
+ <p>
118
+ από το Άξιον Εστί<br>
119
+ του Οδυσσέα Ελύτη
120
+ </blockquote>
121
+
122
+ <p>
123
+
124
+ The first stanza of
125
+ <a href="http://www.ocf.berkeley.edu/%7Eleong/Russkaya%20Literatura/Aleksandr%20Sergeevich%20Pushkin.htm"><b>Pushkin</b></a>'s <cite>Bronze Horseman</cite> (Russian):<br>
126
+ <p><blockquote>
127
+ На берегу пустынных волн<br>
128
+ Стоял он, дум великих полн,<br>
129
+ И вдаль глядел. Пред ним широко<br>
130
+ Река неслася; бедный чёлн<br>
131
+ По ней стремился одиноко.<br>
132
+ По мшистым, топким берегам<br>
133
+ Чернели избы здесь и там,<br>
134
+ Приют убогого чухонца;<br>
135
+ И лес, неведомый лучам<br>
136
+ В тумане спрятанного солнца,<br>
137
+ Кругом шумел.<br>
138
+ </blockquote><p>
139
+
140
+ <a href="http://www.compling.hu-berlin.de/~johannes/mxedruli/"><b>Šota Rustaveli</b></a>'s Veṗxis Ṭq̇aosani,
141
+ ̣︡Th, <cite>The Knight in the Tiger's Skin</cite> (Georgian):<p>
142
+ <blockquote>
143
+ ვეპხის ტყაოსანი
144
+ შოთა რუსთაველი
145
+ <p>
146
+ ღმერთსი შემვედრე, ნუთუ კვლა დამხსნას სოფლისა შრომასა,
147
+ ცეცხლს, წყალსა და მიწასა, ჰაერთა თანა მრომასა;
148
+ მომცნეს ფრთენი და აღვფრინდე, მივჰხვდე მას ჩემსა ნდომასა,
149
+ დღისით და ღამით ვჰხედვიდე მზისა ელვათა კრთომაასა.
150
+ </blockquote>
151
+ <p>
152
+
153
+ Tamil poetry of Cupiramaniya Paarathiyar,
154
+
155
+ சுப்ரமணிய பாரதியார் (1882-1921):
156
+
157
+ <p>
158
+ <blockquote>
159
+
160
+ யாமறிந்த மொழிகளிலே தமிழ்மொழி போல் இனிதாவது எங்கும் காணோம், <br>
161
+ பாமரராய் விலங்குகளாய், உலகனைத்தும் இகழ்ச்சிசொலப் பான்மை கெட்டு, <br>
162
+ நாமமது தமிழரெனக் கொண்டு இங்கு வாழ்ந்திடுதல் நன்றோ? சொல்லீர்!<br
163
+ தேமதுரத் தமிழோசை உலகமெலாம் பரவும்வகை செய்தல் வேண்டும்.
164
+
165
+ <p>
166
+
167
+ </blockquote>
168
+
169
+ <hr>
170
+ <h3><a name="glass">I Can Eat Glass</a></h3>
171
+
172
+ And from the sublime to the ridiculous, here is a
173
+ <a href="#notes">certain phrase&sup1;</a> in an assortment of languages:
174
+
175
+ <p>
176
+ <ol>
177
+ <li><b>Sanskrit</b>: काचं शक्नोम्यत्तुम् । नोपहिनस्ति माम् ॥
178
+
179
+ <li><b>Sanskrit</b> <i>(standard transcription):</i> kācaṃ śaknomyattum; nopahinasti mām.
180
+ <li><b>Classical Greek</b>: ὕαλον ϕαγεῖν δύναμαι· τοῦτο οὔ με βλάπτει.
181
+ <li><b>Greek</b>: Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα.
182
+ <br><b>Etruscan</b>: (NEEDED)
183
+ <li><b>Latin</b>: Vitrum edere possum; mihi non nocet.
184
+ <li><b>Old French</b>: Je puis mangier del voirre. Ne me nuit.
185
+ <li><b>French</b>: Je peux manger du verre, ça ne me fait pas de mal.
186
+ <li><b>Provençal / Occitan</b>: Pòdi manjar de veire, me nafrariá pas.
187
+ <li><b>Québécois</b>: J'peux manger d'la vitre, ça m'fa pas mal.
188
+ <li><b>Walloon</b>: Dji pou magnî do vêre, çoula m' freut nén må.
189
+ <br><b>Champenois</b>: (NEEDED)
190
+ <br><b>Lorrain</b>: (NEEDED)
191
+ <li><b>Picard</b>: Ch'peux mingi du verre, cha m'foé mie n'ma.
192
+ <br><b>Corsican</b>: (NEEDED)
193
+ <br><b>J&egrave;rriais</b>: (NEEDED)
194
+ <li><b>Kreyòl Ayisyen</b>: Mwen kap manje vè, li pa blese'm.
195
+ <li><b>Basque</b>: Kristala jan dezaket, ez dit minik ematen.
196
+ <li><b>Catalan / Català</b>: Puc menjar vidre, que no em fa mal.
197
+ <li><b>Spanish</b>: Puedo comer vidrio, no me hace daño.
198
+ <li><b>Aragones</b>: Puedo minchar beire, no me'n fa mal .
199
+ <li><b>Galician</b>: Eu podo xantar cristais e non cortarme.
200
+ <li><b>Portuguese</b>: Posso comer vidro, não me faz mal.
201
+ <li><b>Brazilian Portuguese</b> (<a href="#notes">7</a>):
202
+ Posso comer vidro, não me machuca.
203
+ <li><b>Caboverdiano</b>: M' podê cumê vidru, ca ta maguâ-m'.
204
+ <li><b>Papiamentu</b>: Ami por kome glas anto e no ta hasimi daño.
205
+ <li><b>Italian</b>: Posso mangiare il vetro e non mi fa male.
206
+ <li><b>Milanese</b>: Sôn bôn de magnà el véder, el me fa minga mal.
207
+ <li><b>Roman</b>: Me posso magna' er vetro, e nun me fa male.
208
+ <li><b>Napoletano</b>: M' pozz magna' o'vetr, e nun m' fa mal.
209
+ <li><b>Sicilian</b>: Puotsu mangiari u vitru, nun mi fa mali.
210
+ <li><b>Venetian</b>: Mi posso magnare el vetro, no'l me fa mae.
211
+ <li><b>Zeneise</b> <i>(Genovese):</i> Pòsso mangiâ o veddro e o no me fà mâ.
212
+ <br><b>Rheto-Romance / Romansch</b>: (NEEDED)
213
+ <br><b>Romany / Tsigane</b>: (NEEDED)
214
+ <li><b>Romanian</b>: Pot să mănânc sticlă și ea nu mă rănește.
215
+ <li><b>Esperanto</b>: Mi povas manĝi vitron, ĝi ne damaĝas min.
216
+ <br><b>Pictish</b>: (NEEDED)
217
+ <br><b>Breton</b>: (NEEDED)
218
+ <li><b>Cornish</b>: Mý a yl dybry gwéder hag éf ny wra ow ankenya.
219
+ <li><b>Welsh</b>: Dw i'n gallu bwyta gwydr, 'dyw e ddim yn gwneud dolur i mi.
220
+ <li><b>Manx Gaelic</b>: Foddym gee glonney agh cha jean eh gortaghey mee.
221
+ <li><b>Old Irish</b> <i>(Ogham):</i> ᚛᚛ᚉᚑᚅᚔᚉᚉᚔᚋ ᚔᚈᚔ ᚍᚂᚐᚅᚑ ᚅᚔᚋᚌᚓᚅᚐ᚜
222
+ <li><b>Old Irish</b> <i>(Latin):</i> Con·iccim ithi nglano. Ním·géna.
223
+
224
+ <li><b>Irish</b>: Is féidir liom gloinne a ithe. Ní dhéanann sí dochar ar bith dom.
225
+
226
+ <li><b>Scottish Gaelic</b>: S urrainn dhomh gloinne ithe; cha ghoirtich i mi.
227
+ <li><b>Anglo-Saxon</b> <i>(Runes):</i>
228
+ ᛁᚳ᛫ᛗᚨᚷ᛫ᚷᛚᚨᛋ᛫ᛖᚩᛏᚪᚾ᛫ᚩᚾᛞ᛫ᚻᛁᛏ᛫ᚾᛖ᛫ᚻᛖᚪᚱᛗᛁᚪᚧ᛫ᛗᛖ᛬
229
+ <li><b>Anglo-Saxon</b> <i>(Latin):</i> Ic mæg glæs eotan ond hit ne hearmiað me.
230
+ <li><b>Middle English</b>: Ich canne glas eten and hit hirtiþ me nouȝt.
231
+ <li><b>English</b>: I can eat glass and it doesn't hurt me.
232
+ <li><b>English</b> <i>(IPA):</i> [aɪ kæn iːt glɑːs ænd ɪt dɐz nɒt hɜːt miː] (Received Pronunciation)
233
+ <li><b>English</b> <i>(Braille):</i> ⠊⠀⠉⠁⠝⠀⠑⠁⠞⠀⠛⠇⠁⠎⠎⠀⠁⠝⠙⠀⠊⠞⠀⠙⠕⠑⠎⠝⠞⠀⠓⠥⠗⠞⠀⠍⠑
234
+ <li><b>Lalland Scots / Doric</b>: Ah can eat gless, it disnae hurt us.
235
+ <br><b>Glaswegian</b>: (NEEDED)
236
+ <li><b>Gothic</b> (<a href="#notes">4</a>):
237
+ 𐌼𐌰𐌲
238
+ 𐌲𐌻𐌴𐍃
239
+ 𐌹̈𐍄𐌰𐌽,
240
+ 𐌽𐌹
241
+ 𐌼𐌹𐍃
242
+ 𐍅𐌿
243
+ 𐌽𐌳𐌰𐌽
244
+ 𐌱𐍂𐌹𐌲𐌲𐌹𐌸.
245
+ <li><b>Old Norse</b> <i>(Runes):</i> ᛖᚴ ᚷᛖᛏ ᛖᛏᛁ
246
+ ᚧ ᚷᛚᛖᚱ ᛘᚾ
247
+ ᚦᛖᛋᛋ ᚨᚧ ᚡᛖ
248
+ ᚱᚧᚨ ᛋᚨᚱ
249
+
250
+ <li><b>Old Norse</b> <i>(Latin):</i> Ek get etið gler án þess að verða sár.
251
+
252
+ <li><b>Norsk / Norwegian (Nynorsk):</b> Eg kan eta glas utan å skada meg.
253
+ <li><b>Norsk / Norwegian (Bokmål):</b> Jeg kan spise glass uten å skade meg.
254
+ <br><b>Føroyskt / Faroese</b>: (NEEDED)
255
+ <li><b>Íslenska / Icelandic</b>: Ég get etið gler án þess að meiða mig.
256
+ <li><b>Svenska / Swedish</b>: Jag kan äta glas utan att skada mig.
257
+ <li><b>Dansk / Danish</b>: Jeg kan spise glas, det gør ikke ondt på mig.
258
+ <li><b>Soenderjysk</b>: Æ ka æe glass uhen at det go mæ naue.
259
+ <li><b>Frysk / Frisian</b>: Ik kin glês ite, it docht me net sear.
260
+ <!-- <li><b>Nederlands / Dutch</b>: Ik kan glas eten, het doet mij geen pijn. -->
261
+ <!-- <li><b>Nederlands / Dutch</b>: Ik kan glas eten zonder dat het
262
+
263
+ schaadt. -->
264
+ <!-- <li><tt>Dutch: Ik kan glas eten, maar dat doet mij geen kwaad.</tt> -->
265
+ <li><b>Nederlands / Dutch</b>: Ik kan glas eten, het doet
266
+
267
+ geen kwaad.
268
+
269
+
270
+ <LI><B>Kirchröadsj/Bôchesserplat</B>: Iech ken glaas èèse, mer 't deet miech
271
+ jing pieng.</LI>
272
+
273
+ <li><b>Afrikaans</b>: Ek kan glas eet, maar dit doen my nie skade nie.
274
+ <li><b>Lëtzebuergescht / Luxemburgish</b>: Ech kan Glas iessen, daat deet mir nët wei.
275
+ <li><b>Deutsch / German</b>: Ich kann Glas essen, ohne mir weh zu tun.
276
+ <li><b>Ruhrdeutsch</b>: Ich kann Glas verkasematuckeln, ohne dattet mich wat jucken tut.
277
+ <li><b>Langenfelder Platt</b>:
278
+ Isch kann Jlaas kimmeln, uuhne datt mich datt weh dääd.
279
+ <li><b>Lausitzer Mundart</b> ("Lusatian"): Ich koann Gloos assn und doas
280
+ dudd merr ni wii.
281
+ <li><b>Odenwälderisch</b>: Iech konn glaasch voschbachteln ohne dass es mir ebbs daun doun dud.
282
+ <li><b>Sächsisch / Saxon</b>: 'sch kann Glos essn, ohne dass'sch mer wehtue.
283
+ <li><b>Pfälzisch</b>: Isch konn Glass fresse ohne dasses mer ebbes ausmache dud.
284
+ <li><b>Schwäbisch / Swabian</b>: I kå Glas frässa, ond des macht mr nix!
285
+ <li><b>Bayrisch / Bavarian</b>: I koh Glos esa, und es duard ma ned wei.
286
+ <li><b>Allemannisch</b>: I kaun Gloos essen, es tuat ma ned weh.
287
+ <li><b>Schwyzerdütsch</b>: Ich chan Glaas ässe, das tuet mir nöd weeh.
288
+ <li><b>Hungarian</b>: Meg tudom enni az üveget, nem lesz tőle bajom.
289
+ <li><b>Suomi / Finnish</b>: Voin syödä lasia, se ei vahingoita minua.
290
+ <li><b>Sami (Northern)</b>: Sáhtán borrat lása, dat ii leat bávččas.
291
+ <li><b>Erzian</b>: Мон ярсан
292
+ суликадо, ды
293
+ зыян
294
+ эйстэнзэ а
295
+ ули.
296
+ <br><b>Karelian</b>: (NEEDED)
297
+ <br><b>Vepsian</b>: (NEEDED)
298
+ <br><b>Votian</b>: (NEEDED)
299
+ <br><b>Livonian</b>: (NEEDED)
300
+ <li><b>Estonian</b>: Ma võin klaasi süüa, see ei tee mulle midagi.
301
+ <li><b>Latvian</b>: Es varu ēst stiklu, tas man nekaitē.
302
+ <li><b>Lithuanian</b>: Aš galiu valgyti stiklą ir jis manęs nežeidžia
303
+ <br><b>Old Prussian</b>: (NEEDED)
304
+ <br><b>Sorbian</b> (Wendish): (NEEDED)
305
+ <li><b>Czech</b>: Mohu jíst sklo, neublíží mi.
306
+ <li><b>Slovak</b>: Môžem jesť sklo. Nezraní ma.
307
+ <li><b>Polska / Polish</b>: Mogę jeść szkło i mi nie szkodzi.
308
+ <li><b>Slovenian:</b> Lahko jem steklo, ne da bi mi škodovalo.
309
+ <li><b>Croatian</b>: Ja mogu jesti staklo i ne boli me.
310
+ <li><b>Serbian</b> <i>(Latin):</i> Mogu jesti staklo a da mi ne škodi.
311
+ <li><b>Serbian</b> <i>(Cyrillic):</i> Могу јести стакло
312
+ а
313
+ да ми
314
+ не
315
+ шкоди.
316
+ <li><b>Macedonian:</b> Можам да јадам стакло, а не ме штета.
317
+ <li><b>Russian</b>: Я могу есть стекло, оно мне не вредит.
318
+ <li><b>Belarusian</b> <i>(Cyrillic):</i> Я магу есці шкло, яно мне не шкодзіць.
319
+ <li><b>Belarusian</b> <i>(Lacinka):</i> Ja mahu jeści škło, jano mne ne škodzić.
320
+ <li><b>Ukrainian</b>: Я можу їсти шкло, й воно мені не пошкодить.
321
+ <!-- <li><b>Bulgarian</b>: Мога да ям стъкло и не ме боли. -->
322
+ <li><b>Bulgarian</b>: Мога да ям стъкло, то не ми вреди.
323
+
324
+ <li><b>Georgian</b>: მინას ვჭამ და არა მტკივა.
325
+ <li><b>Armenian</b>: Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։
326
+ <li><b>Albanian</b>: Unë mund të ha qelq dhe nuk më gjen gjë.
327
+ <li><b>Turkish</b>: Cam yiyebilirim, bana zararı dokunmaz.
328
+ <li><b>Turkish</b> <i>(Ottoman):</i> جام ييه بلورم بڭا ضررى طوقونمز
329
+ <li><b>Bangla / Bengali</b>:
330
+ আমি কাঁচ খেতে পারি, তাতে আমার কোনো ক্ষতি হয় না।
331
+ <li><b>Marathi</b>: मी काच खाऊ शकतो, मला ते दुखत नाही.
332
+ <li><b>Hindi</b>: मैं काँच खा सकता हूँ, मुझे उस से कोई पीडा नहीं होती.
333
+ <li><b>Tamil</b>: நான் கண்ணாடி சாப்பிடுவேன், அதனால் எனக்கு ஒரு கேடும் வராது.
334
+
335
+ <li><b>Urdu</b><a href="#notes">(2)</a>: <span dir="RTL" lang=UR>
336
+ میں کانچ کھا سکتا ہوں اور مجھے تکلیف نہیں ہوتی ۔</span>
337
+ <li><b>Pashto</b><a href="#notes">(2)</a>: زه شيشه خوړلې شم، هغه ما نه خوږوي
338
+ <li><b>Farsi / Persian</b>: .من می توانم بدونِ احساس درد شيشه بخورم
339
+ <li><b>Arabic</b><a href="#notes">(2)</a>: <span dir="RTL" lang=AR>أنا قادر على أكل الزجاج و هذا لا يؤلمني.</span>
340
+ <br><B>Aramaic</B>: (NEEDED)
341
+ <li><B>Hebrew</B><a href="#notes">(2)</a>: <SPAN dir=rtl lang=HE>אני יכול לאכול זכוכית וזה לא מזיק לי.</SPAN>
342
+ <li><B>Yiddish</B><a href="#notes">(2)</a>: <SPAN dir=rtl lang=JI>איך קען עסן גלאָז און עס טוט מיר נישט װײ.</SPAN>
343
+ <br><b>Judeo-Arabic</b>: (NEEDED)
344
+ <br><b>Ladino</b>: (NEEDED)
345
+ <br><b>Gǝʼǝz</b>: (NEEDED)
346
+ <br><b>Amharic</b>: (NEEDED)
347
+ <li><b>Twi</b>: Metumi awe tumpan, ɜnyɜ me hwee.
348
+ <li><b>Hausa</b> (<i>Latin</i>): Inā iya taunar gilāshi kuma in gamā lāfiyā.
349
+ <li><b>Hausa</b> (<i>Ajami</i>) <a href="#notes">(2)</a>: <SPAN dir=rtl lang=HA>
350
+ إِنا إِىَ تَونَر غِلَاشِ كُمَ إِن غَمَا لَافِىَا</SPAN>
351
+ <li><b>Yoruba</b><a href="#notes">(3)</a>: Mo lè je̩ dígí, kò ní pa mí lára.
352
+ <li><b>(Ki)Swahili</b>: Naweza kula bilauri na sikunyui.
353
+
354
+ <li><b>Malay</b>: Saya boleh makan kaca dan ia tidak mencederakan saya.
355
+ <li><b>Tagalog</b>: Kaya kong kumain nang bubog at hindi ako masaktan.
356
+ <li><b>Chamorro</b>: Siña yo' chumocho krestat, ti ha na'lalamen yo'.
357
+ <li><b>Javanese</b>: Aku isa mangan beling tanpa lara.
358
+ <li><b>Burmese</b>:
359
+ က္ယ္ဝန္‌တော္‌၊က္ယ္ဝန္‌မ မ္ယက္‌စားနုိင္‌သည္‌။ ၎က္ရောင္‌့
360
+ ထိခုိက္‌မ္ဟု မရ္ဟိပာ။
361
+ (7)
362
+
363
+ <li><B>Vietnamese (quốc ngữ)</B>: Tôi có thể ăn thủy tinh mà không hại gì.
364
+ <li><B>Vietnamese (nôm)</B> (<a href="#notes">4</a>): 些 𣎏 世 咹 水 晶 𦓡 空 𣎏 害 咦
365
+ <br><b>Khmer</b>: (NEEDED)
366
+ <br><b>Lao</b>: (NEEDED)
367
+ <li><b>Thai</b>: ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ
368
+ <li><b>Mongolian</b> <i>(Cyrillic):</i> Би шил идэй чадна, надад хортой биш
369
+ <li><b>Mongolian</b> <i>(Classic) (<a href="#notes">5</a>):</i>
370
+ ᠪᠢ ᠰᠢᠯᠢ ᠢᠳᠡᠶᠦ ᠴᠢᠳᠠᠨᠠ ᠂ ᠨᠠᠳᠤᠷ ᠬᠣᠤᠷᠠᠳᠠᠢ ᠪᠢᠰᠢ
371
+ <br><b>Dzongkha</b>: (NEEDED)
372
+ <br><b>Nepali</b>: (NEEDED)
373
+ <li><b>Tibetan</b>: ཤེལ་སྒོ་ཟ་ནས་ང་ན་གི་མ་རེད།
374
+ <li><b>Chinese</b>: <span lang=zh>我能吞下玻璃而不伤身体。</span>
375
+ <li><b>Chinese</b> (Traditional): 我能吞下玻璃而不傷身體。
376
+
377
+ <li><b>Taiwanese</b><a href="#notes">(6)</a>: Góa ē-tàng chia̍h po-lê, mā bē tio̍h-siong.
378
+ <li><b>Japanese</b>: <span lang=ja>私はガラスを食べられます。それは私を傷つけません。</span>
379
+ <li><b>Korean</b>: <span lang=ko>나는 유리를 먹을 수 있어요. 그래도 아프지 않아요</span>
380
+ <li><b>Bislama</b>: Mi save kakae glas, hemi no save katem mi.<br>
381
+ <li><b>Hawaiian</b>: Hiki iaʻu ke ʻai i ke aniani; ʻaʻole nō lā au e ʻeha.<br>
382
+ <li><b>Marquesan</b>: E koʻana e kai i te karahi, mea ʻā, ʻaʻe hauhau.
383
+ <li><b>Chinook Jargon:</b> Naika məkmək kakshət labutay, pi weyk ukuk munk-sik nay.
384
+ <li><b>Navajo</b>: Tsésǫʼ yishą́ągo bííníshghah dóó doo shił neezgai da.
385
+ <br><b>Cherokee</b> <i>(and Cree, Ojibwa, Inuktitut, and other Native American languages):</i> (NEEDED)
386
+ <br><b>Garifuna</b>: (NEEDED)
387
+ <br><b>Gullah</b>: (NEEDED)
388
+ <li><b>Lojban</b>: mi kakne le nu citka le blaci .iku'i le se go'i na xrani mi
389
+ <li><b>Nórdicg</b>: Lj&#339;r ye caudran créneþ ý jor c&#7811;ran.
390
+ </ol>
391
+ <p>
392
+
393
+ <i>(Additions, corrections, completions,</i>
394
+ <a href="mailto:kermit@columbia.edu"><i>gratefully accepted</i></a><i>.)</i>
395
+
396
+ <p>
397
+ For testing purposes, some of these are repeated in a <b>monospace font</b>&nbsp;.&nbsp;.&nbsp;.
398
+ <p>
399
+ <ol>
400
+ <li><tt>Euro Symbol: €.</tt>
401
+ <li><tt>Greek: Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα.</tt>
402
+ <li><tt>Íslenska / Icelandic: Ég get etið gler án þess að meiða mig.</tt>
403
+
404
+ <li><tt>Polish: Mogę jeść szkło, i mi nie szkodzi.</tt>
405
+ <li><tt>Romanian: Pot să mănânc sticlă și ea nu mă rănește.</tt>
406
+ <li><tt>Ukrainian: Я можу їсти шкло, й воно мені не пошкодить.</tt>
407
+ <li><tt>Armenian: Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։</tt>
408
+ <li><tt>Georgian: მინას ვჭამ და არა მტკივა.</tt>
409
+ <li><tt>Hindi: मैं काँच खा सकता हूँ, मुझे उस से कोई पीडा नहीं होती.</tt>
410
+ <li><tt>Hebrew<a href="#notes">(2)</a>: <SPAN dir=rtl lang=HE>אני יכול לאכול זכוכית וזה לא מזיק לי.</SPAN></tt>
411
+ <li><tt>Yiddish<a href="#notes">(2)</a>: <SPAN dir=rtl lang=JI>איך קען עסן גלאָז און עס טוט מיר נישט װײ.</SPAN></tt>
412
+ <li><tt>Arabic<a href="#notes">(2)</a>: <span dir="RTL" lang=AR>أنا قادر على أكل الزجاج و هذا لا يؤلمني.</span></tt>
413
+ <li><tt>Japanese: <span lang=ja>私はガラスを食べられます。それは私を傷つけません。</span></tt>
414
+ <li><tt>Thai: ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ</tt>
415
+ </ol>
416
+ <p>
417
+
418
+ <b><a name="notes">Notes:</a></b>
419
+
420
+ <p>
421
+ <ol>
422
+
423
+ <li>The "I can eat glass" phrase and initial translations (about 30 of them)
424
+ were borrowed from Ethan Mollick's <a
425
+ href="http://hcs.harvard.edu/~igp/glass.html">I Can Eat Glass</a> page
426
+ (which disappeared on or about June 2004) and converted to UTF-8. Since
427
+ Ethan's original page is gone, I should mention that his purpose was to offer
428
+ travelers a phrase they could use in any country that would command a
429
+ certain kind of respect, or at least get attention. See <a
430
+ href="#credits">Credits</a> for the many additional contributions since
431
+ then. When submitting new entries, the word "hurt" (if you have a choice)
432
+ is used in the sense of "cause harm", "do damage", or "bother", rather than
433
+ "inflict pain" or "make sad". In this vein Otto Stolz comments (as do
434
+ others further down; personally I think it's better for the purpose of this
435
+ page to have extra entries and/or to show a greater repertoire of characters
436
+ than it is to enforce a strict interpretation of the word "hurt"!):
437
+
438
+ <p>
439
+ <object>
440
+ <blockquote>
441
+ <small>
442
+
443
+ This is the meaning I have translated to the Swabian dialect.
444
+
445
+ However, I just have noticed that most of the German variants
446
+ translate the "inflict pain" meaning. The German example should rather
447
+ read:
448
+
449
+ <p>
450
+ <blockquote>
451
+ "Ich kann Glas essen ohne mir zu schaden."
452
+ </blockquote>
453
+ <p>
454
+
455
+ (The comma fell victim to the 1996 orthographic reform,
456
+ cf. <a href="http://www.ids-mannheim.de/reform/e3-1.html#P76"><tt>http://www.ids-mannheim.de/reform/e3-1.html#P76</tt></a>.
457
+
458
+ <p>
459
+
460
+ You may wish to contact the contributors of the following translations
461
+ to correct them:
462
+
463
+ <p>
464
+ <ul>
465
+
466
+ <li> Lëtzebuergescht / Luxemburgish: Ech kan Glas iessen, daat deet mir nët wei.
467
+ <li> Lausitzer Mundart ("Lusatian"): Ich koann Gloos assn und doas dudd merr ni wii.
468
+ <li> Sächsisch / Saxon: 'sch kann Glos essn, ohne dass'sch mer wehtue.
469
+ <li> Bayrisch / Bavarian: I koh Glos esa, und es duard ma ned wei.
470
+ <li> Allemannisch: I kaun Gloos essen, es tuat ma ned weh.
471
+ <li> Schwyzerdütsch: Ich chan Glaas ässe, das tuet mir nöd weeh.
472
+ </ul>
473
+ <p>
474
+
475
+ In contrast, I deem the following translations *alright*:
476
+
477
+ <p>
478
+ <ul>
479
+
480
+ <li> Ruhrdeutsch: Ich kann Glas verkasematuckeln, ohne dattet mich wat jucken tut.
481
+ <li> Pfälzisch: Isch konn Glass fresse ohne dasses mer ebbes ausmache dud.
482
+ <li> Schwäbisch / Swabian: I kå Glas frässa, ond des macht mr nix!
483
+ </ul>
484
+ <p>
485
+
486
+ (However, you could remove the commas, on account of
487
+ <a href="http://www.ids-mannheim.de/reform/e3-1.html#P76"><tt>http://www.ids-mannheim.de/reform/e3-1.html#P76</tt></a>
488
+ and
489
+
490
+ <a href="http://www.ids-mannheim.de/reform/e3-1.html#P72"><tt>http://www.ids-mannheim.de/reform/e3-1.html#P72</tt></a>, respectively.)
491
+
492
+ <p>
493
+
494
+ I guess, also these examples translate the <i>wrong</i> sense of "hurt",
495
+ though I do not know these languages well enough to assert them
496
+ definitely:
497
+
498
+ <p>
499
+ <ul>
500
+
501
+ <li> Nederlands / Dutch: Ik kan glas eten; het doet mij geen
502
+ pijn. <i>(This one has been changed)</i>
503
+ <li> Kirchröadsj/Bôchesserplat: Iech ken glaas èèse, mer 't deet miech jing pieng.
504
+
505
+ </ul>
506
+ <p>
507
+
508
+ In the Romanic languages, the variations on "fa male" (it) are probably
509
+ wrong, whilst the variations on "hace daño" (es) and "damaĝas" (Esperanto) are probably correct; "nocet" (la) is definitely right.
510
+
511
+ <p>
512
+
513
+ The northern Germanic variants of "skada" are probably right, as are
514
+ the Slavic variants of "škodi/шкоди" (se); however the Slavic variants
515
+ of " boli" (hv) are probably wrong, as "bolena" means "pain/ache", IIRC.
516
+
517
+ </small>
518
+ </blockquote>
519
+ </object>
520
+ <p>
521
+
522
+ The numbering of the samples is arbitrary, done only to keep track of how
523
+ many there are, and can change any time a new entry is added. The
524
+ arrangement is also arbitrary but with some attempt to group related
525
+ examples together. Note: All languages not listed are wanted, not just the
526
+ ones that say (NEEDED).
527
+
528
+ <li><a name="note1">Correct right-to-left display of these languages
529
+ depends on the capabilities of your browser.</a> The period should
530
+ appear on the left. In the monospace Yiddish example, the Yiddish digraphs
531
+ should occupy one character cell.
532
+
533
+ <li>Yoruba: The third word is Latin letter small 'j' followed by
534
+ small 'e' with U+0329, Combining Vertical Line Below. This displays
535
+ correctly only if your Unicode font includes the U+0329 glyph and your
536
+ browser supports combining diacritical marks. The Indic examples
537
+ also include combining sequences.
538
+
539
+ <li>Includes Unicode 3.1 (or later) characters beyond Plane 0.
540
+
541
+ <li>The Classic Mongolian example should be vertical, top-to-bottom and
542
+ left-to-right. But such display is almost impossible. Also no font yet
543
+ exists which provides the proper ligatures and positional variants for the
544
+ characters of this script, which works somewhat like Arabic.
545
+
546
+ <li>Taiwanese is also known as Holo or Hoklo, and is related to Southern
547
+ Min dialects such as Amoy.
548
+ Contributed by Henry H. Tan-Tenn, who comments, "The above is
549
+ the romanized version, in a script current among Taiwanese Christians since
550
+ the mid-19th century. It was invented by British missionaries and saw use in
551
+ hundreds of published works, mostly of a religious nature. Most Taiwanese did
552
+ not know Chinese characters then, or at least not well enough to read. More
553
+ to the point, though, a written standard using Chinese characters has never
554
+ developed, so a significant minority of words are represented with different
555
+ candidate characters, depending on one's personal preference or etymological
556
+ theory. In this sentence, for example, "-tàng", "chia̍h",
557
+ "mā" and "bē" are problematic using Chinese characters.
558
+ "Góa" (I/me) and "po-lê" (glass) are as written in other Sinitic
559
+ languages (e.g. Mandarin, Hakka)."
560
+
561
+ <li>Wagner Amaral of Pinese &amp; Amaral Associados notes that
562
+ the Brazilian Portuguese sentence for
563
+ "I can eat glass" should be identical to the Portuguese one, as the word
564
+ "machuca" means "inflict pain", or rather "injuries". The words "faz
565
+ mal" would more correctly translate as "cause harm".
566
+
567
+ <li>Burmese: In English the first person pronoun "I" stands for both
568
+ genders, male and female. In Burmese (except in the central part of Burma)
569
+ kyundaw (<font
570
+ size="+1"
571
+ face="Padauk">က္ယ္ဝန္‌တော္‌</font>) for male and kyanma (<font
572
+ size="+1" face="Padauk">က္ယ္ဝန္‌မ</font>) for female.
573
+ Using here a fully-compliant Unicode Burmese font -- sadly one and only Padauk
574
+ Graphite font exists -- rendering using graphite engine.
575
+ <a href="http://h1.ripway.com/bamarsar/">CLICK HERE</a> to test Burmese
576
+ characters.
577
+
578
+ </ol>
579
+
580
+ <hr>
581
+ <h3><a name="quickbrownfox">The Quick Brown Fox</a></h3>
582
+
583
+ The "I can eat glass" sentences do not necessarily show off the orthography of
584
+ each language to best advantage. In many alphabetic written languages it is
585
+ possible to include all (or most) letters (or "special" characters) in
586
+ a single (often nonsense) <i>pangram</i>. These were traditionally used in
587
+ typewriter instruction; now they are useful for stress-testing computer fonts
588
+ and keyboard input methods. Here are a few examples (SEND MORE):
589
+
590
+ <p>
591
+ <ol>
592
+
593
+ <li><b>English:</b> The quick brown fox jumps over the lazy dog.
594
+ <li><b>Irish:</b> "An ḃfuil do ċroí ag bualaḋ ó ḟaitíos an ġrá a ṁeall lena ṗóg éada ó
595
+ ṡlí do leasa ṫú?"
596
+ "D'ḟuascail Íosa Úrṁac na hÓiġe Beannaiṫe pór Éava agus Áḋaiṁ."
597
+ <li><b>Dutch:</b> Pa's wijze lynx bezag vroom het fikse aquaduct.
598
+ <li><b>German: </b> Falsches Üben von Xylophonmusik quält jeden
599
+ größeren Zwerg. (1)
600
+ <li><b>German: </b> <span lang=da>Im finſteren Jagdſchloß am offenen Felsquellwaſſer patzte der affig-flatterhafte kauzig-höf‌liche Bäcker über ſeinem verſifften kniffligen C-Xylophon.</span> (2)
601
+ <li><b>Swedish:</b> Flygande bäckasiner söka strax hwila på mjuka tuvor.
602
+ <li><b>Icelandic:</b> Sævör grét áðan því úlpan var ónýt.
603
+ <li><b>Polish:</b> Pchnąć w tę łódź jeża lub ośm skrzyń fig.
604
+ <li><b>Czech:</b> Příliš
605
+ žluťoučký kůň úpěl
606
+ ďábelské kódy.
607
+ <li><b>Slovak:</b> Starý kôň na hŕbe
608
+ kníh žuje tíško povädnuté
609
+ ruže, na stĺpe sa ďateľ
610
+ učí kvákať novú ódu o
611
+ živote.
612
+ <li><b>Russian:</b> В чащах
613
+ юга жил-был
614
+ цитрус? Да,
615
+ но
616
+ фальшивый
617
+ экземпляр!
618
+ ёъ.
619
+
620
+ <li><b>Bulgarian:</b> Жълтата дюля беше щастлива, че пухът, който цъфна, замръзна като гьон.
621
+
622
+ <li><b>Sami (Northern):</b> Vuol Ruoŧa geđggiid leat máŋga luosa ja čuovžža.
623
+ <li><b>Hungarian:</b> Árvíztűrő tükörfúrógép.
624
+ <li><b>Spanish:</b> El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y frío, añoraba a su querido cachorro.
625
+ <li><b>Portuguese:</b> O próximo vôo à noite sobre o Atlântico, põe freqüentemente o único médico. (3)
626
+ <li><b>French:</b> Les naïfs ægithales hâtifs pondant à Noël où il gèle sont sûrs d'être
627
+ déçus et de voir leurs drôles d'œufs abîmés.
628
+
629
+ <li><b>Esperanto:</b> Eĥoŝanĝo
630
+ ĉiuĵaŭde.
631
+
632
+ <li><b>Hebrew:</b> <span dir="RTL" lang=HE>זה כיף סתם לשמוע איך תנצח קרפד עץ טוב בגן.</span>
633
+
634
+ <li><b>Japanese</b> (Hiragana):<blockquote>
635
+ いろはにほへど ちりぬるを<br>
636
+ わがよたれぞ つねならむ<br>
637
+ うゐのおくやま けふこえて<br>
638
+ あさきゆめみじ ゑひもせず
639
+ (4)
640
+ </blockquote>
641
+
642
+ </ol>
643
+ <p>
644
+ <a name="notes2"><b>Notes:</b></a>
645
+ <p>
646
+ <ol>
647
+
648
+ <li>Other phrases commonly used in Germany include: "Ein wackerer Bayer
649
+ vertilgt ja bequem zwo Pfund Kalbshaxe" and, more recently, "Franz jagt im
650
+ komplett verwahrlosten Taxi quer durch Bayern", but both lack umlauts and
651
+ esszet. Previously, going for the shortest sentence that has all the
652
+ umlauts and special characters, I had
653
+ "Grüße aus Bärenhöfe
654
+ (und Óechtringen)!"
655
+ Acute accents are not used in native German words, so I was surprised to
656
+ discover "Óechtringen" in the Deutsche Bundespost
657
+ Postleitzahlenbuch:
658
+ <p>
659
+ <blockquote>
660
+ <a href="http://www.columbia.edu/~fdc/misc/oechtringen.jpg"><img
661
+ src="oechtringen-sm.jpg" alt="Click for full-size image (2.8MB)"></a>
662
+ </blockquote>
663
+ <p>
664
+ It's a small village in eastern Lower Saxony.
665
+ The "oe" in this case
666
+ turns out to be the Lower Saxon "lengthening e" (Dehnungs-e), which makes the
667
+ previous vowel long (used in a number of Lower Saxon place names such as Soest
668
+ and Itzehoe), not the "e" that indicates umlaut of the preceding vowel.
669
+ Many thanks to the Óechtringen-Namenschreibungsuntersuchungskomitee
670
+ (Alex Bochannek, Manfred Erren, Asmus Freytag, Christoph P&auml;per, plus
671
+ Werner Lemberg who serves as
672
+ Óechtringen-Namenschreibungsuntersuchungskomiteerechtschreibungsprüfer)
673
+
674
+ for their relentless pursuit of the facts in this case. Conclusion: the
675
+ accent almost certainly does not belong on this (or any other native German)
676
+ word, but neither can it be dismissed as dirt on the page. To add to the
677
+ mystery, it has been reported that other copies of the same edition of the
678
+ PLZB do not show the accent! UPDATE (March 2006): David Krings was
679
+ intrigued enough by this report to contact the mayor of Ebstorf, of which
680
+ Oechtringen is a borough, who responded:
681
+
682
+ <p>
683
+ <blockquote style="font-family:sans-serif;font-size:80%">
684
+ Sehr geehrter Mr. Krings,<br>
685
+ wenn Oechtringen irgendwo mit einem Akzent auf dem O geschrieben wurde,
686
+ dann kann das nur ein Fehldruck sein. Die offizielle Schreibweise lautet
687
+ jedenfalls „Oechtringen“.<br>
688
+ Mit freundlichen Grüssen<br>
689
+ Der Samtgemeindebürgermeister<br>
690
+ i.A. Lothar Jessel
691
+
692
+ </blockquote>
693
+
694
+
695
+ <p>
696
+ <li>From Karl Pentzlin (Kochel am See, Bavaria, Germany):
697
+ "This German phrase is suited for display by a Fraktur (broken letter)
698
+ font. It contains: all common three-letter ligatures: ffi ffl fft and all
699
+ two-letter ligatures required by the Duden for Fraktur typesetting: ch ck ff
700
+ fi fl ft ll ſch ſi ſſ ſt tz (all in a
701
+ manner such they are not part of a three-letter ligature), one example of f-l
702
+ where German typesetting rules prohibit ligating (marked by a ZWNJ), and all
703
+ German letters a...z, ä,ö,ü,ß, ſ [long s]
704
+ (all in a manner such that they are not part of a two-letter Fraktur
705
+ ligature)."
706
+
707
+ Otto Stolz notes that "'Schloß' is now spelled 'Schloss', in
708
+ contrast to 'größer' (example 4) which has kept its
709
+ 'ß'. Fraktur has been banned from general use, in 1942, and long-s
710
+ (ſ) has ceased to be used with Antiqua (Roman) even earlier (the
711
+ latest Antiqua-ſ I have seen is from 1913, but then
712
+ I am no expert, so there may well be a later instance." Later Otto confirms
713
+ the latter theory, "Now I've run across a book “Deutsche
714
+ Rechtschreibung” (edited by Lutz Mackensen) from 1954 (my reprint
715
+ is from 1956) that has kept the Antiqua-ſ in its dictionary part (but
716
+ neither in the preface nor in the appendix)."
717
+
718
+ <p>
719
+
720
+ <li>Diaeresis is not used in Iberian Portuguese.
721
+
722
+ <p>
723
+
724
+ <li>From Yurio Miyazawa: "This poetry contains all the sounds in the
725
+ Japanese language and used to be the first thing for children to learn in
726
+ their Japanese class. The Hiragana version is particularly neat because it
727
+ covers every character in the phonetic Hiragana character set." Yurio also
728
+ sent the Kanji version:
729
+
730
+ <p>
731
+ <blockquote>
732
+ 色は匂へど 散りぬるを<br>
733
+ 我が世誰ぞ 常ならむ<br>
734
+ 有為の奥山 今日越えて<br>
735
+ 浅き夢見じ 酔ひもせず
736
+ </blockquote>
737
+
738
+ </ol>
739
+ <p>
740
+ <b>Accented Cyrillic:</b>
741
+ <p>
742
+
743
+ <i>(This section contributed by Vladimir Marinov.)</i>
744
+
745
+ <p>
746
+
747
+ In Bulgarian it is desirable, customary, or in some cases required to
748
+ write accents over vowels. Unfortunately, no computer character sets
749
+ contain the full repertoire of accented Cyrillic letters. With Unicode,
750
+ however, it is possible to combine any Cyrillic letter with any combining
751
+ accent. The appearance of the result depends on the font and the rendering
752
+ engine. Here are two examples.
753
+
754
+ <p>
755
+ <ol>
756
+
757
+ <li>Той видя бялата коса́ по главата и́ и ко́са на рамото и́, и ре́че да и́
758
+ рече́: "Пара́та по́ па́ри от па́рата, не ща пари́!", но си поми́сли: "Хей,
759
+ помисли́ си! А́ и́ река, а́ е скочила в тази река, която щеше да тече́,
760
+ а не те́че."
761
+
762
+ <p>
763
+
764
+ <li>По пъ́тя пъту́ват кю́рди и югославя́ни.
765
+
766
+ </ol>
767
+
768
+ <hr>
769
+ <h3><a name="html">HTML Features</a></h3>
770
+
771
+ Here is the Russian alphabet (uppercase only) coded in three
772
+ different ways, which should look identical:
773
+
774
+ <p>
775
+ <ol>
776
+ <li>АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
777
+ &nbsp; <i>(Literal UTF-8)</i>
778
+ <li>&#1040;&#1041;&#1042;&#1043;&#1044;&#1045;&#1046;&#1047;&#1048;&#1049;&#1050;&#1051;&#1052;&#1053;&#1054;&#1055;&#1056;&#1057;&#1058;&#1059;&#1060;&#1061;&#1062;&#1063;&#1064;&#1065;&#1066;&#1067;&#1068;&#1069;&#1070;&#1071;
779
+ &nbsp; <i>(Decimal numeric character reference)</i>
780
+ <li>&#x0410;&#x0411;&#x0412;&#x0413;&#x0414;&#x0415;&#x0416;&#x0417;&#x0418;&#x0419;&#x041a;&#x041b;&#x041c;&#x041d;&#x041e;&#x041f;&#x0420;&#x0421;&#x0422;&#x0423;&#x0424;&#x0425;&#x0426;&#x0427;&#x0428;&#x0429;&#x042a;&#x042b;&#x042c;&#x042d;&#x042e;&#x042f;
781
+ &nbsp; <i>(Hexadecimal numeric character reference)</i>
782
+ </ol>
783
+
784
+ <p>
785
+
786
+ In another test, we use HTML language tags to distinguish Bulgarian, Russian,
787
+ and <a href="http://www.tiro.com/transfer/Serbian_Rendering.pdf">Serbian</a>,
788
+ which have different italic forms for lowercase
789
+ б, г, д, п, and/or т:
790
+ <p>
791
+ <blockquote>
792
+ <table>
793
+ <tr>
794
+ <td><b>Bulgarian</b>: &nbsp;
795
+ <td><span lang=BG>[&nbsp;бгдпт</span>&nbsp;] &nbsp;
796
+ <td><span lang=BG>[&nbsp;<i>бгдпт</i></span>&nbsp;] &nbsp;
797
+ <td><span lang=BG><i> Мога да ям стъкло и не ме боли.</span></i>
798
+ <tr>
799
+ <td><b>Russian</b>:
800
+ <td><span lang=RU>[&nbsp;бгдпт</span>&nbsp;] &nbsp;
801
+ <td><span lang=RU>[&nbsp;<i>бгдпт</i></span>&nbsp;] &nbsp;
802
+ <td><span lang=RU><i>Я могу есть стекло, это мне не вредит.</i></span>
803
+ <tr>
804
+ <td><b>Serbian</b>:
805
+ <td><span lang=SR>[&nbsp;бгдпт</span>&nbsp;] &nbsp;
806
+ <td><span lang=SR>[&nbsp;<i>бгдпт</i></span>&nbsp;] &nbsp;
807
+ <td> <span lang=SR><i>Могу јести стакло
808
+ а
809
+ да ми
810
+ не
811
+ шкоди.</i></span>
812
+ </table>
813
+ </blockquote>
814
+ <p>
815
+
816
+ <hr>
817
+ <h3><a name="credits">Credits, Tools, and Commentary</a></h3>
818
+
819
+ <dl>
820
+ <dt><b>Credits:</b></dt>
821
+ <dd>
822
+ The "I can eat glass" phrase and the initial collection of translations:
823
+ <a href="http://hcs.harvard.edu/~igp/glass.html">Ethan Mollick</a>.
824
+ Transcription / conversion to UTF-8: Frank da&nbsp;Cruz.
825
+ <b>Albanian:</b> Sindi Keesan.
826
+ <b>Afrikaans:</b> Johan Fourie, Kevin Poalses.
827
+ <b>Anglo Saxon:</b> Frank da&nbsp;Cruz.
828
+ <b>Arabic:</b> Najib Tounsi.
829
+ <b>Armenian:</b> Vaçe Kundakçı.
830
+ <b>Belarusian:</b> Alexey Chernyak.
831
+ <b>Bengali:</b> Somnath Purkayastha, Deepayan Sarkar.
832
+ <b>Bislama:</b> Dan McGarry.
833
+ <b>Braille:</b> Frank da&nbsp;Cruz.
834
+ <b>Bulgarian:</b> Sindi Keesan, Guentcho Skordev, Vladimir Marinov.
835
+ <b>Burmese:</b> "cetanapa".
836
+ <b>Cabo Verde Creole:</b> Cláudio Alexandre Duarte.
837
+ <b>Catal&aacute;n:</b> Jordi Bancells.
838
+ <b>Chinese:</b> Jack Soo, Wong Pui Lam.
839
+ <b>Chinook Jargon:</b> David Robertson.
840
+ <b>Cornish:</b> Chris Stephens.
841
+ <b>Croatian:</b> Marjan Baće.
842
+ <b>Czech:</b> Stanislav Pecha, Radovan Garabík.
843
+ <b>Dutch:</b> Peter Gotink. Pim Blokland, Rob Daniel, Rob de Wit.
844
+ <b>Erzian:</b> Jack Rueter.
845
+ <b>Esperanto:</b> Franko Luin, Radovan Garabík.
846
+ <b>Estonian:</b> Meelis Roos.
847
+ <b>Farsi/Persian:</b> Payam Elahi.
848
+ <b>Finnish:</b> Sampsa Toivanen.
849
+ <b>French:</b> Luc Carissimo, Anne Colin du&nbsp;Terrail, Sean M. Burke.
850
+ <b>Galician:</b> Laura Probaos.
851
+ <b>Georgian:</b> Giorgi Lebanidze.
852
+ <b>German:</b> Christoph Päper, Otto Stolz, Karl Pentzlin, David Krings,
853
+ Frank da&nbsp;Cruz.
854
+ <b>Gothic:</b> Aur&eacute;lien Coudurier.
855
+ <b>Greek:</b> Ariel Glenn, Constantine Stathopoulos, Siva Nataraja.
856
+ <b>Hebrew:</b> Jonathan Rosenne, Tal Barnea.
857
+ <b>Hausa:</b> Malami Buba, Tom Gewecke.
858
+ <b>Hawaiian:</b> na Hauʻoli Motta, Anela de&nbsp;Rego, Kaliko Trapp.
859
+ <b>Hindi:</b> Shirish Kalele.
860
+ <b>Hungarian:</b> András Rácz, Mark Holczhammer.
861
+ <b>Icelandic:</b> Andrés Magnússon, Sveinn Baldursson.
862
+ <b>International Phonetic Alphabet (IPA):</b> Siva Nataraja / Vincent Ramos.
863
+ <b>Irish:</b> Michael Everson, Marion Gunn, James Kass, Curtis Clark.
864
+ <b>Italian:</b> Thomas De Bellis.
865
+ <b>Japanese:</b> Makoto Takahashi, Yurio Miyazawa.
866
+ <b>Kirchröadsj:</b> Roger Stoffers.
867
+ <b>Kreyòl:</b> Sean M. Burke.
868
+ <b>Korean:</b> Jungshik Shin.
869
+ <b>Langenfelder Platt:</b> David Krings.
870
+ <b>Lëtzebuergescht:</b> Stefaan Eeckels.
871
+ <b>Lithuanian:</b> Gediminas Grigas.
872
+ <b>Lojban:</b> Edward Cherlin.
873
+ <b>Lusatian:</b> Ronald Schaffhirt.
874
+ <b>Macedonian:</b> Sindi Keesan.
875
+ <b>Malay:</b> Zarina Mustapha.
876
+ <b>Manx:</b> &Eacute;anna &Oacute; Br&aacute;daigh.
877
+ <b>Marathi:</b> Shirish Kalele.
878
+ <b>Marquesan:</b> Kaliko Trapp.
879
+ <b>Middle English:</b> Frank da&nbsp;Cruz.
880
+ <b>Milanese:</b> Marco Cimarosti.
881
+ <b>Mongolian:</b> Tom Gewecke.
882
+ <b>Napoletano:</b> Diego Quintano.
883
+ <b>Navajo:</b> Tom Gewecke.
884
+ <a href="http://www.langmaker.com/db/mdl_nordicg.htm"><b>Nórdicg</b></a>:
885
+ Y&#7811;lyan Rott.
886
+ <b>Norwegian:</b> Herman Ranes.
887
+ <b>Odenwälderisch:</b> Alexander He&szlig;.
888
+ <b>Old Irish:</b> Michael Everson.
889
+ <b>Old Norse:</b> Andrés Magnússon.
890
+ <b>Papiamentu:</b> Bianca and Denise Zanardi.
891
+ <b>Pashto:</b> N.R. Liwal.
892
+ <b>Pfälzisch:</b> Dr. Johannes Sander.
893
+ <b>Picard:</b> Philippe Mennecier.
894
+ <b>Polish:</b> Juliusz Chroboczek, Paweł Przeradowski.
895
+ <b>Portuguese:</b> "Cláudio" Alexandre Duarte, Bianca and Denise
896
+ Zanardi, Pedro Palhoto Matos, Wagner Amaral.
897
+ <b>Québécois:</b> Laurent Detillieux.
898
+ <b>Roman:</b> Pierpaolo Bernardi.
899
+ <b>Romanian:</b> Juliusz Chroboczek, Ionel Mugurel.
900
+ <b>Ruhrdeutsch:</b> "Timwi".
901
+ <b>Russian:</b> Alexey Chernyak, Serge Nesterovitch.
902
+ <b>Sami:</b> Anne Colin du&nbsp;Terrail, Luc Carissimo.
903
+ <b>Sanskrit:</b> Siva Nataraja / Vincent Ramos.
904
+ <b>Sächsisch:</b> André Müller.
905
+ <b>Schwäbisch:</b> Otto Stolz.
906
+ <b>Scots:</b> Jonathan Riddell.
907
+ <b>Serbian:</b> Sindi Keesan, Ranko Narancic, Boris Daljevic, Szilvia Csorba.
908
+ <b>Slovak:</b> G. Adam Stanislav, Radovan Garabík.
909
+ <b>Slovenian:</b> Albert Kolar.
910
+ <b>Spanish:</b> <a href="http://www.panix.com/~aleida">Aleida
911
+ Mu&ntilde;oz</a>, Laura Probaos.
912
+ <b>Swahili:</b> Ronald Schaffhirt.
913
+ <b>Swedish:</b> Christian Rose, Bengt Larsson.
914
+ <b>Taiwanese:</b> Henry H. Tan-Tenn.
915
+ <b>Tagalog:</b> Jim Soliven.
916
+ <b>Tamil:</b> Vasee Vaseeharan.
917
+ <b>Tibetan:</b> D. Germano, Tom Gewecke.
918
+ <b>Thai:</b> Alan Wood's wife.
919
+ <b>Turkish:</b> Vaçe Kundakçı, Tom Gewecke, Merlign Olnon.
920
+ <b>Ukrainian:</b> Michael Zajac.
921
+ <b>Urdu:</b> Mustafa Ali.
922
+ <a href="http://nomfoundation.org/"><b>Vietnamese</b></a>: Dixon Au,
923
+ [James] Đỗ Bá Phước
924
+ <font face="PMingLiU">&#x675c; &#x4f2f; &#x798f;</font>.
925
+ <b>Walloon:</b> Pablo Saratxaga.
926
+ <b>Welsh:</b> Geiriadur Prifysgol Cymru (Andrew).
927
+ <b>Yiddish:</b> Mark David,
928
+ <b>Zeneise:</b> Angelo Pavese.
929
+
930
+ <p>
931
+
932
+ <dt><b>Tools Used to Create This Web Page:</b></dt>
933
+
934
+ <dd>The UTF8-aware <a href="k95.html">Kermit 95</a> terminal emulator on
935
+ Windows, to a Unix host with the <a
936
+ href="http://www.gnu.org/directory/emacs.html">EMACS</a> text editor. Kermit
937
+ 95 displays UTF-8 and also allows keyboard entry of arbitrary Unicode BMP
938
+ characters as 4 hex digits, as shown <a href="glass.html">HERE</a>. Hex codes
939
+ for Unicode values can be found in <a
940
+ href="http://www.unicode.org/unicode/uni2book/u2.html">The Unicode
941
+ Standard</a> (recommended) and the <a
942
+ href="http://www.unicode.org/charts/">online code charts</a>. When
943
+ submissions arrive by email encoded in some other character set (Latin-1,
944
+ Latin-2, KOI, various PC code pages, JEUC, etc), I use the TRANSLATE command
945
+ of <a href="ckermit.html">C-Kermit</a> on the Unix host (<a
946
+ href="safe.html">where I read my mail</a>) to convert the character set to
947
+ UTF-8 (I could also use Kermit 95 for this; it has the same TRANSLATE
948
+ command). That's it -- no "Web authoring" tools, no locales, no "smart"
949
+ anything. It's just plain text, nothing more. By the way, there's nothing
950
+ special about EMACS -- any text editor will do, providing it allows entry of
951
+ arbitrary 8-bit bytes as text, including the 0x80-0x9F "C1" range. EMACS 21.1
952
+ actually supports UTF-8; earlier versions don't know about it and display the
953
+ octal codes; either way is OK for this purpose.
954
+
955
+ <p>
956
+
957
+ <dt><b>Commentary:</b>
958
+ <dd>Date: Wed, 27 Feb 2002 13:21:59 +0100<br>
959
+ From: "Bruno DEDOMINICIS" <tt>&lt;b.dedominicis@cite-sciences.fr&gt;</tt><br>
960
+ Subject: Je peux manger du verre, cela ne me fait pas mal.
961
+
962
+ <p>
963
+
964
+ I just found out your website and it makes me feel like proposing an
965
+ interpretation of the choice of this peculiar phrase.
966
+
967
+ <p>
968
+
969
+ Glass is transparent and can hurt as everyone knows. The relation between
970
+ people and civilisations is sometimes effusional and more often rude. The
971
+ concept of breaking frontiers through globalization, in a way, is also an
972
+ attempt to deny any difference. Isn't "transparency" the flag of modernity?
973
+ Nothing should be hidden any more, authority is obsolete, and the new powers
974
+ are supposed to reign through loving and smiling and no more through
975
+ coercion...
976
+
977
+ <p>
978
+
979
+ Eating glass without pain sounds like a very nice metaphor of this attempt.
980
+ That is, frontiers should become glass transparent first, and be denied by
981
+ incorporating them. On the reverse, it shows that through globalization,
982
+ frontiers undergo a process of displacement, that is, when they are not any
983
+ more speakable, they become repressed from the speech and are therefore
984
+ incorporated and might become painful symptoms, as for example what happens
985
+ when one tries to eat glass.
986
+
987
+ <p>
988
+
989
+ The frontiers that used to separate bodies one from another tend to divide
990
+ bodies from within and make them suffer.... The chosen phrase then appears
991
+ as a denial of the symptom that might result from the destitution of
992
+ traditional frontiers.
993
+
994
+ <p>
995
+ Best,<br>
996
+ Bruno De Dominicis, Paris, France
997
+ </dl>
998
+
999
+ <p>
1000
+ <b>Other Unicode pages onsite:</b>
1001
+ <ul>
1002
+ <li><a href="http://www.columbia.edu/~fdc/pace/">Peace in All Languages</a>
1003
+ <li><a href="postal.html">Frank's Compulsive Guide to Postal Addresses</a>
1004
+ (especially the <a href="postal.html#index">Index</a>)
1005
+ <li><a href="st-erkenwald.html">Representing Middle English on the Web with UTF-8</a>
1006
+ <li><a href="biblio.html">The Kermit Bibliography</a> (in UTF-8)
1007
+ <li><a href="accents.html">Interchange of Non-English Computer Text</a>
1008
+ (UTF-8 math and box-drawing)
1009
+ <li><a href="utf8-t1.html">Unicode Table</a> (in UTF-8)
1010
+ </ul>
1011
+ <p>
1012
+ <b>Unicode samplers offsite:</b>
1013
+ <ul>
1014
+ <li>Michael Everson's
1015
+ <a href="http://www.evertype.com/scriptbib.html">Bibliography of Typography
1016
+ and Scripts</a>
1017
+ <li><a href="http://home.att.net/~jameskass/scriptlinks.htm">Sample Unicode
1018
+ Test Pages and Script Links</a>
1019
+ <li><a href="http://crism.maden.org/dunno.html">I don't know, I only work here</a>
1020
+ <li><a href="http://www.trigeminal.com/samples/provincial.html">Anyone
1021
+ can be provincial!</a>
1022
+ <li><a href="http://www.macchiato.com/unicode/Unicode_transcriptions.html">Transcriptions of "Unicode"</a>
1023
+ <li><a href="http://www.i18nguy.com/unicode-example.html">Example
1024
+ Unicode Usage for Business Applications</a>
1025
+ <li><a href="http://www.cl.cam.ac.uk/~mgk25/unicode.html#apps">UTF-8 and
1026
+ Unicode FAQ for Unix/Linux</a>
1027
+ </ul>
1028
+ <p>
1029
+ <b>Unicode fonts:</b>
1030
+ <ul>
1031
+ <li><a href="http://www.alanwood.net/unicode/fonts.html">Unicode Fonts
1032
+ for Windows Computers</a> (Alan Wood)
1033
+ <li><a href="http://www.cl.cam.ac.uk/~mgk25/ucs-fonts.html">Unicode Fonts and
1034
+ Tools for X11</a> (Markus Kuhn)
1035
+ <li><a href="http://www.evertype.com/emono/">Everson Mono</a> (Michael
1036
+ Everson)
1037
+ <li><a href="http://www.monotype.com">Agfa Monotype</a>
1038
+ </ul>
1039
+
1040
+ <p>
1041
+ [ <a href="k95.html">Kermit 95</a> ]
1042
+ [ <a href="glass.html">K95 Screen Shots</a> ]
1043
+ [ <a href="ckermit.html">C-Kermit</a> ]
1044
+ [ <a href="index.html">Kermit Home</a> ]
1045
+ [ <a href="http://www.unicode.org/help/display_problems.html">Display Problems?</a> ]
1046
+ [ <a href="http://www.unicode.org">The Unicode Consortium</a> ]
1047
+ <hr>
1048
+ <ADDRESS>
1049
+ UTF-8 Sampler / <a href="index.html">The Kermit Project</a> /
1050
+ <a href="http://www.columbia.edu">Columbia University</a> /
1051
+ <a href="mailto:kermit@columbia.edu">kermit@columbia.edu</a>
1052
+ </ADDRESS>
1053
+ </body>
1054
+ </html>