wishdev-rio 0.4.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (296) hide show
  1. data/COPYING +341 -0
  2. data/README +81 -0
  3. data/Rakefile +281 -0
  4. data/build_doc.rb +94 -0
  5. data/doc/ANNOUNCE +159 -0
  6. data/doc/RELEASE_NOTES +308 -0
  7. data/doc/RIOIS +215 -0
  8. data/doc/generators/template/html/rio.css +428 -0
  9. data/doc/generators/template/html/rio.rb +523 -0
  10. data/doc/generators/template/html/ugly.rb +132 -0
  11. data/doc/pkg_def.rb +60 -0
  12. data/doc/rfc1738.txt +1403 -0
  13. data/doc/rfc959.txt +3933 -0
  14. data/ex/catcsv.rb +64 -0
  15. data/ex/colx.rb +8 -0
  16. data/ex/findinruby +15 -0
  17. data/ex/findruby +14 -0
  18. data/ex/passwd_report.rb +8 -0
  19. data/ex/prompt.rb +25 -0
  20. data/ex/rgb.txt.gz +0 -0
  21. data/ex/riocat +42 -0
  22. data/ex/riogunzip +31 -0
  23. data/ex/riogzip +24 -0
  24. data/ex/rioprompt.rb +10 -0
  25. data/ex/targz2zip +17 -0
  26. data/ex/tonl +10 -0
  27. data/lib/rio/abstract_method.rb +56 -0
  28. data/lib/rio/argv.rb +56 -0
  29. data/lib/rio/arycopy.rb +43 -0
  30. data/lib/rio/assert.rb +114 -0
  31. data/lib/rio/base.rb +56 -0
  32. data/lib/rio/callstr.rb +46 -0
  33. data/lib/rio/const.rb +51 -0
  34. data/lib/rio/construct.rb +50 -0
  35. data/lib/rio/constructor.rb +258 -0
  36. data/lib/rio/context/autoclose.rb +72 -0
  37. data/lib/rio/context/copying.rb +55 -0
  38. data/lib/rio/context/cxx.rb +66 -0
  39. data/lib/rio/context/dir.rb +120 -0
  40. data/lib/rio/context/gzip.rb +50 -0
  41. data/lib/rio/context/methods.rb +182 -0
  42. data/lib/rio/context/skip.rb +66 -0
  43. data/lib/rio/context/stream.rb +229 -0
  44. data/lib/rio/context.rb +117 -0
  45. data/lib/rio/cp.rb +370 -0
  46. data/lib/rio/def.rb +53 -0
  47. data/lib/rio/dir.rb +144 -0
  48. data/lib/rio/doc/EXAMPLES.rb +299 -0
  49. data/lib/rio/doc/HOWTO.rb +737 -0
  50. data/lib/rio/doc/INDEX.rb +311 -0
  51. data/lib/rio/doc/INTRO.rb +1068 -0
  52. data/lib/rio/doc/OPTIONAL.rb +130 -0
  53. data/lib/rio/doc/SYNOPSIS.rb +183 -0
  54. data/lib/rio/doc.rb +45 -0
  55. data/lib/rio/entrysel.rb +246 -0
  56. data/lib/rio/exception/copy.rb +97 -0
  57. data/lib/rio/exception/notimplemented.rb +57 -0
  58. data/lib/rio/exception/notsupported.rb +46 -0
  59. data/lib/rio/exception/open.rb +61 -0
  60. data/lib/rio/exception/state.rb +73 -0
  61. data/lib/rio/exception.rb +41 -0
  62. data/lib/rio/ext/csv.rb +351 -0
  63. data/lib/rio/ext/if.rb +45 -0
  64. data/lib/rio/ext/mp3info.rb +80 -0
  65. data/lib/rio/ext/splitlines.rb +253 -0
  66. data/lib/rio/ext/yaml/doc.rb +133 -0
  67. data/lib/rio/ext/yaml/tie.rb +149 -0
  68. data/lib/rio/ext/yaml.rb +164 -0
  69. data/lib/rio/ext/zipfile/fs.rb +116 -0
  70. data/lib/rio/ext/zipfile/rl.rb +251 -0
  71. data/lib/rio/ext/zipfile/rootdir.rb +117 -0
  72. data/lib/rio/ext/zipfile/state.rb +161 -0
  73. data/lib/rio/ext/zipfile/wrap.rb +204 -0
  74. data/lib/rio/ext/zipfile.rb +110 -0
  75. data/lib/rio/ext.rb +138 -0
  76. data/lib/rio/factory.rb +436 -0
  77. data/lib/rio/file.rb +118 -0
  78. data/lib/rio/filter/closeoneof.rb +103 -0
  79. data/lib/rio/filter/gzip.rb +70 -0
  80. data/lib/rio/filter.rb +94 -0
  81. data/lib/rio/fs/base.rb +41 -0
  82. data/lib/rio/fs/impl.rb +122 -0
  83. data/lib/rio/fs/native.rb +75 -0
  84. data/lib/rio/fs/stream.rb +61 -0
  85. data/lib/rio/fs/url.rb +63 -0
  86. data/lib/rio/ftp/conncache.rb +101 -0
  87. data/lib/rio/ftp/dir.rb +94 -0
  88. data/lib/rio/ftp/fs.rb +180 -0
  89. data/lib/rio/ftp/ftpfile.rb +20 -0
  90. data/lib/rio/grande.rb +97 -0
  91. data/lib/rio/handle.rb +100 -0
  92. data/lib/rio/if/basic.rb +64 -0
  93. data/lib/rio/if/csv.rb +76 -0
  94. data/lib/rio/if/dir.rb +157 -0
  95. data/lib/rio/if/file.rb +89 -0
  96. data/lib/rio/if/fileordir.rb +268 -0
  97. data/lib/rio/if/grande.rb +729 -0
  98. data/lib/rio/if/grande_entry.rb +379 -0
  99. data/lib/rio/if/grande_stream.rb +693 -0
  100. data/lib/rio/if/internal.rb +125 -0
  101. data/lib/rio/if/path.rb +462 -0
  102. data/lib/rio/if/rubyio.rb +681 -0
  103. data/lib/rio/if/string.rb +83 -0
  104. data/lib/rio/if/temp.rb +45 -0
  105. data/lib/rio/if/test.rb +282 -0
  106. data/lib/rio/if/yaml.rb +206 -0
  107. data/lib/rio/if.rb +64 -0
  108. data/lib/rio/ioh.rb +162 -0
  109. data/lib/rio/iomode.rb +109 -0
  110. data/lib/rio/ios/fail.rb +106 -0
  111. data/lib/rio/ios/generic.rb +119 -0
  112. data/lib/rio/ios/mode.rb +60 -0
  113. data/lib/rio/ios/null.rb +119 -0
  114. data/lib/rio/iowrap.rb +128 -0
  115. data/lib/rio/kernel.rb +54 -0
  116. data/lib/rio/local.rb +62 -0
  117. data/lib/rio/match.rb +53 -0
  118. data/lib/rio/matchrecord.rb +283 -0
  119. data/lib/rio/no_warn.rb +49 -0
  120. data/lib/rio/nullio.rb +159 -0
  121. data/lib/rio/open3.rb +68 -0
  122. data/lib/rio/ops/construct.rb +61 -0
  123. data/lib/rio/ops/create.rb +77 -0
  124. data/lib/rio/ops/dir.rb +346 -0
  125. data/lib/rio/ops/either.rb +134 -0
  126. data/lib/rio/ops/file.rb +102 -0
  127. data/lib/rio/ops/path.rb +296 -0
  128. data/lib/rio/ops/stream/input.rb +267 -0
  129. data/lib/rio/ops/stream/output.rb +100 -0
  130. data/lib/rio/ops/stream/read.rb +86 -0
  131. data/lib/rio/ops/stream/write.rb +57 -0
  132. data/lib/rio/ops/stream.rb +87 -0
  133. data/lib/rio/ops/symlink.rb +80 -0
  134. data/lib/rio/path/reset.rb +69 -0
  135. data/lib/rio/path.rb +129 -0
  136. data/lib/rio/piper/cp.rb +80 -0
  137. data/lib/rio/piper.rb +122 -0
  138. data/lib/rio/prompt.rb +66 -0
  139. data/lib/rio/rectype.rb +88 -0
  140. data/lib/rio/rl/base.rb +118 -0
  141. data/lib/rio/rl/builder.rb +117 -0
  142. data/lib/rio/rl/chmap.rb +66 -0
  143. data/lib/rio/rl/fs2url.rb +82 -0
  144. data/lib/rio/rl/ioi.rb +78 -0
  145. data/lib/rio/rl/path.rb +110 -0
  146. data/lib/rio/rl/pathmethods.rb +116 -0
  147. data/lib/rio/rl/uri.rb +200 -0
  148. data/lib/rio/rl/withpath.rb +296 -0
  149. data/lib/rio/scheme/aryio.rb +88 -0
  150. data/lib/rio/scheme/cmdio.rb +80 -0
  151. data/lib/rio/scheme/cmdpipe.rb +118 -0
  152. data/lib/rio/scheme/fd.rb +65 -0
  153. data/lib/rio/scheme/ftp.rb +141 -0
  154. data/lib/rio/scheme/http.rb +78 -0
  155. data/lib/rio/scheme/null.rb +55 -0
  156. data/lib/rio/scheme/path.rb +98 -0
  157. data/lib/rio/scheme/stderr.rb +55 -0
  158. data/lib/rio/scheme/stdio.rb +71 -0
  159. data/lib/rio/scheme/strio.rb +87 -0
  160. data/lib/rio/scheme/sysio.rb +63 -0
  161. data/lib/rio/scheme/tcp.rb +75 -0
  162. data/lib/rio/scheme/temp.rb +200 -0
  163. data/lib/rio/state/error.rb +72 -0
  164. data/lib/rio/state.rb +242 -0
  165. data/lib/rio/stream/base.rb +54 -0
  166. data/lib/rio/stream/duplex.rb +79 -0
  167. data/lib/rio/stream/open.rb +202 -0
  168. data/lib/rio/stream.rb +181 -0
  169. data/lib/rio/symantics.rb +45 -0
  170. data/lib/rio/tempdir.rb +132 -0
  171. data/lib/rio/to_rio/all.rb +39 -0
  172. data/lib/rio/to_rio/array.rb +39 -0
  173. data/lib/rio/to_rio/io.rb +40 -0
  174. data/lib/rio/to_rio/object.rb +42 -0
  175. data/lib/rio/to_rio/string.rb +40 -0
  176. data/lib/rio/to_rio.rb +67 -0
  177. data/lib/rio/uri/file.rb +198 -0
  178. data/lib/rio/util.rb +48 -0
  179. data/lib/rio/version.rb +51 -0
  180. data/lib/rio.rb +162 -0
  181. data/setup.rb +1360 -0
  182. data/test/bin/count_lines.rb +11 -0
  183. data/test/bin/find_lines.rb +13 -0
  184. data/test/bin/list_dir.rb +14 -0
  185. data/test/ftp/all.rb +9 -0
  186. data/test/ftp/anon_copy_data.rb +36 -0
  187. data/test/ftp/anon_misc.rb +124 -0
  188. data/test/ftp/anon_read.rb +105 -0
  189. data/test/ftp/anon_special.rb +68 -0
  190. data/test/ftp/anon_write.rb +70 -0
  191. data/test/ftp/ftp2ftp.rb +51 -0
  192. data/test/ftp/initftpfiles.rb +14 -0
  193. data/test/ftp/testdef.rb +55 -0
  194. data/test/gem_runtests.rb +15 -0
  195. data/test/http/all.rb +4 -0
  196. data/test/http/copy-from-http.rb +141 -0
  197. data/test/http/uri-meta.rb +72 -0
  198. data/test/lib/temp_server.rb +46 -0
  199. data/test/runalltests.rb +17 -0
  200. data/test/runftptests.rb +14 -0
  201. data/test/runhttp.rb +11 -0
  202. data/test/runhttptests.rb +14 -0
  203. data/test/runtests.rb +52 -0
  204. data/test/tc/abs.rb +355 -0
  205. data/test/tc/all.rb +80 -0
  206. data/test/tc/base.rb +31 -0
  207. data/test/tc/base2.rb +87 -0
  208. data/test/tc/cd1.rb +113 -0
  209. data/test/tc/clearsel.rb +68 -0
  210. data/test/tc/clone.rb +208 -0
  211. data/test/tc/closeoncopy.rb +102 -0
  212. data/test/tc/closeoneof.rb +194 -0
  213. data/test/tc/cmdpipe.rb +149 -0
  214. data/test/tc/copy-dir-samevar.rb +91 -0
  215. data/test/tc/copy-from.rb +129 -0
  216. data/test/tc/copy-to.rb +91 -0
  217. data/test/tc/copy.rb +74 -0
  218. data/test/tc/copyarray.rb +188 -0
  219. data/test/tc/copydest.rb +50 -0
  220. data/test/tc/copydir.rb +166 -0
  221. data/test/tc/copydirlines.rb +121 -0
  222. data/test/tc/copylines.rb +46 -0
  223. data/test/tc/copynonex.rb +118 -0
  224. data/test/tc/copysymlink.rb +39 -0
  225. data/test/tc/create.rb +114 -0
  226. data/test/tc/csv.rb +226 -0
  227. data/test/tc/csv2.rb +138 -0
  228. data/test/tc/csv_columns.rb +37 -0
  229. data/test/tc/csvutil.rb +56 -0
  230. data/test/tc/dir.rb +76 -0
  231. data/test/tc/dir_iter.rb +383 -0
  232. data/test/tc/dirautoclose.rb +67 -0
  233. data/test/tc/dirent.rb +178 -0
  234. data/test/tc/dirss.rb +81 -0
  235. data/test/tc/each.rb +111 -0
  236. data/test/tc/each_break.rb +243 -0
  237. data/test/tc/edf.rb +81 -0
  238. data/test/tc/empty.rb +51 -0
  239. data/test/tc/emptyriodir.rb +129 -0
  240. data/test/tc/entary.rb +227 -0
  241. data/test/tc/entsel.rb +110 -0
  242. data/test/tc/eq.rb +101 -0
  243. data/test/tc/expand_path.rb +69 -0
  244. data/test/tc/ext.rb +136 -0
  245. data/test/tc/fileno.rb +94 -0
  246. data/test/tc/files_select.rb +92 -0
  247. data/test/tc/get.rb +152 -0
  248. data/test/tc/getrec.rb +137 -0
  249. data/test/tc/gzip.rb +109 -0
  250. data/test/tc/io_each_byte.rb +60 -0
  251. data/test/tc/io_read.rb +80 -0
  252. data/test/tc/iometh.rb +149 -0
  253. data/test/tc/likeio.rb +116 -0
  254. data/test/tc/line_record_row.rb +51 -0
  255. data/test/tc/lineno.rb +196 -0
  256. data/test/tc/lines.rb +66 -0
  257. data/test/tc/misc.rb +432 -0
  258. data/test/tc/nolines.rb +204 -0
  259. data/test/tc/noqae.rb +879 -0
  260. data/test/tc/null.rb +45 -0
  261. data/test/tc/once.rb +6 -0
  262. data/test/tc/overload.rb +140 -0
  263. data/test/tc/pa.rb +158 -0
  264. data/test/tc/path_parts.rb +175 -0
  265. data/test/tc/pathop.rb +60 -0
  266. data/test/tc/paths.rb +145 -0
  267. data/test/tc/pid.rb +31 -0
  268. data/test/tc/piper.rb +143 -0
  269. data/test/tc/programs_util.rb +24 -0
  270. data/test/tc/qae.rb +493 -0
  271. data/test/tc/qae_riovar.rb +499 -0
  272. data/test/tc/readline.rb +30 -0
  273. data/test/tc/records.rb +68 -0
  274. data/test/tc/rename.rb +233 -0
  275. data/test/tc/rename_assign.rb +45 -0
  276. data/test/tc/riorl.rb +181 -0
  277. data/test/tc/route.rb +51 -0
  278. data/test/tc/selnosel.rb +33 -0
  279. data/test/tc/skip.rb +89 -0
  280. data/test/tc/skiplines.rb +71 -0
  281. data/test/tc/split.rb +28 -0
  282. data/test/tc/splitlines.rb +65 -0
  283. data/test/tc/splitpath.rb +83 -0
  284. data/test/tc/sub.rb +46 -0
  285. data/test/tc/symlink.rb +176 -0
  286. data/test/tc/symlink0.rb +348 -0
  287. data/test/tc/symlink1.rb +114 -0
  288. data/test/tc/synopsis.rb +75 -0
  289. data/test/tc/temp.rb +152 -0
  290. data/test/tc/tempdir.rb +60 -0
  291. data/test/tc/tempfile.rb +66 -0
  292. data/test/tc/testcase.rb +170 -0
  293. data/test/tc/tonl.rb +37 -0
  294. data/test/tc/truncate.rb +39 -0
  295. data/test/tc/yaml.rb +275 -0
  296. metadata +387 -0
@@ -0,0 +1,132 @@
1
+ # Copyright (c) 2005,2006,2007 Christopher Kleckner
2
+ # All rights reserved
3
+ #
4
+ # This file is part of the Rio library for ruby.
5
+ #
6
+ # Rio is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Rio is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Rio; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #
20
+
21
+
22
+ # This is disgraceful -- a hack required to exert control over how rubygems builds
23
+ # documentation for Rio. My desire to have the command "gem install rio" build the
24
+ # docs correctly overrides my sense of propriety in this case. I apologize to anyone
25
+ # who should have to look at this ugliness.
26
+
27
+ # Begin UGLY
28
+ module Generators #:nodoc: all
29
+ #####################################################################
30
+ #
31
+ # Handle common markup tasks for the various Html classes
32
+ #
33
+
34
+ module MarkUp
35
+
36
+ # Convert a string in markup format into HTML. We keep a cached
37
+ # SimpleMarkup object lying around after the first time we're
38
+ # called per object.
39
+
40
+ def markup(str, remove_para=false)
41
+ return '' unless str
42
+ unless defined? @markup
43
+ #p 'RIO MARKUP'
44
+ @markup = SM::SimpleMarkup.new
45
+
46
+ # class names, variable names, file names, or instance variables
47
+ # @markup.add_special(/(
48
+ # \b([A-Z]\w*(::\w+)*[.\#]\w+) # A::B.meth
49
+ # | \b([A-Z]\w+(::\w+)*) # A::B..
50
+ # | \#\w+[!?=]? # #meth_name
51
+ # | \b\w+([_\/\.]+\w+)+[!?=]? # meth_name
52
+ # )/x,
53
+ # :CROSSREF)
54
+ meth_name_re = '\w+[!?=]?|<{1,2}|>{1,2}|\[\]|\||\/|\+@?|={2,3}|=~'
55
+ @markup.add_special(/(
56
+ \b([A-Z]\w*(::\w+)*[.\#](#{meth_name_re})) # A::B.meth
57
+ | \b([A-Z]\w+(::\w+)*) # A::B..
58
+ | \#(#{meth_name_re}) # #meth_name
59
+ | \b\w+([_\/\.]+\w+)+[!?=]? # meth_name
60
+ )/x,
61
+ :CROSSREF)
62
+
63
+ # external hyperlinks
64
+ @markup.add_special(/((link:|https?:|mailto:|ftp:|www\.)\S+\w)/, :HYPERLINK)
65
+
66
+ # and links of the form <text>[<url>]
67
+ @markup.add_special(/(((\{.*?\})|\b\S+?)\[\S+?\.\S+?\])/, :TIDYLINK)
68
+ # @markup.add_special(/\b(\S+?\[\S+?\.\S+?\])/, :TIDYLINK)
69
+
70
+ end
71
+ unless defined? @html_formatter
72
+ @html_formatter = HyperlinkHtml.new(self.path, self)
73
+ end
74
+
75
+ # Convert leading comment markers to spaces, but only
76
+ # if all non-blank lines have them
77
+
78
+ if str =~ /^(?>\s*)[^\#]/
79
+ content = str
80
+ else
81
+ content = str.gsub(/^\s*(#+)/) { $1.tr('#',' ') }
82
+ end
83
+
84
+ res = @markup.convert(content, @html_formatter)
85
+ if remove_para
86
+ res.sub!(/^<p>/, '')
87
+ res.sub!(/<\/p>$/, '')
88
+ end
89
+ res
90
+ end
91
+ end
92
+ end
93
+ module Generators
94
+ class HyperlinkHtml < SM::ToHtml
95
+ def handle_special_CROSSREF(special)
96
+ #p 'handle_special_CROSSREF'
97
+ name = special.text
98
+ if name[0,1] == '#'
99
+ lookup = name[1..-1]
100
+ name = lookup unless Options.instance.show_hash
101
+ else
102
+ lookup = name
103
+ end
104
+
105
+ if /([A-Z].*)[.\#](.*)/ =~ lookup
106
+ container = $1
107
+ method = $2
108
+ ref = @context.find_symbol(container, method)
109
+ else
110
+ ref = @context.find_symbol(lookup)
111
+ end
112
+
113
+ if ref and ref.document_self
114
+ #print "#{name} =>"
115
+ #name.sub!(/^(RIO::)?IF::.+\#/,'Rio#')
116
+ name.sub!(/^(RIO::)?IF::.+\#/,'')
117
+ #name.sub!(/^#/,'Rio#')
118
+ name.sub!(/^#/,'')
119
+ #puts " #{name}"
120
+ if %w[Rio Grande String].include?(name) or name =~ /^(Dir)/
121
+ #if %w[Rio Grande String].include?(name)
122
+ name
123
+ else
124
+ "<a href=\"#{ref.as_href(@from_path)}\">#{name}</a>"
125
+ end
126
+ else
127
+ name
128
+ end
129
+ end
130
+ end
131
+ end
132
+ # End UGLY
data/doc/pkg_def.rb ADDED
@@ -0,0 +1,60 @@
1
+ #--
2
+ # ===============================================================================
3
+ # Copyright (c) 2005,2006,2007 Christopher Kleckner
4
+ # All rights reserved
5
+ #
6
+ # This file is part of the Rio library for ruby.
7
+ #
8
+ # Rio is free software; you can redistribute it and/or modify
9
+ # it under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation; either version 2 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # Rio is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with Rio; if not, write to the Free Software
20
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21
+ # ===============================================================================
22
+ #
23
+ # To create the documentation for Rio run the command
24
+ # ruby build_doc.rb
25
+ # from the distribution directory.
26
+ #++
27
+
28
+ require 'rio/doc'
29
+
30
+ module PKG
31
+ NAME = "rio"
32
+ TITLE = RIO::TITLE
33
+ VERSION = RIO::VERSION
34
+ FULLNAME = PKG::NAME + "-" + PKG::VERSION
35
+ SUMMARY = RIO::SUMMARY
36
+ DESCRIPTION = RIO::DESCRIPTION
37
+ AUTHOR = "Christopher Kleckner"
38
+ EMAIL = "rio4ruby@rubyforge.org"
39
+ RUBYFORGE_PROJECT = PKG::NAME
40
+ HOMEPAGE = "http://#{PKG::RUBYFORGE_PROJECT}.rubyforge.org/"
41
+ RUBYFORGE_URL = "http://rubyforge.org/projects/#{PKG::RUBYFORGE_PROJECT}"
42
+ RDOC_OPTIONS = ['--show-hash','--line-numbers','-mRIO::Doc::SYNOPSIS','-Tdoc/generators/template/html/rio.rb']
43
+ module FILES
44
+ SRC = rio('lib').norecurse('.svn').files['*.rb']
45
+ DOC = rio['README'] + rio('lib')['rio.rb'] + rio('lib/rio/doc/')['*.rb'] +
46
+ rio('lib/rio/if/')['*.rb'] + rio('lib/rio')['kernel.rb','constructor.rb']
47
+ XMP = rio('ex').entries[]
48
+ D2 = rio('doc').norecurse('.svn').all.files.skip.dirs['rdoc','.svn']
49
+ TST = rio('test').norecurse('.svn').all.files('*.rb').skip.dirs['qp','.svn']
50
+ MSC = rio.files['setup.rb', 'build_doc.rb', 'COPYING', 'Rakefile', 'ChangeLog', 'VERSION']
51
+
52
+ [SRC,DOC,XMP,D2,TST,MSC].each do |fary|
53
+ fary.map! { |f| f.to_s }
54
+ end
55
+ DIST = SRC + DOC + XMP + D2 + TST + MSC
56
+ end
57
+
58
+ OUT_DIR = 'pkg'
59
+ OUT_FILES = %w[.gem .tar.gz .zip].map { |ex| PKG::OUT_DIR + '/' + FULLNAME + ex }
60
+ end
data/doc/rfc1738.txt ADDED
@@ -0,0 +1,1403 @@
1
+
2
+
3
+
4
+
5
+
6
+
7
+ Network Working Group T. Berners-Lee
8
+ Request for Comments: 1738 CERN
9
+ Category: Standards Track L. Masinter
10
+ Xerox Corporation
11
+ M. McCahill
12
+ University of Minnesota
13
+ Editors
14
+ December 1994
15
+
16
+
17
+ Uniform Resource Locators (URL)
18
+
19
+ Status of this Memo
20
+
21
+ This document specifies an Internet standards track protocol for the
22
+ Internet community, and requests discussion and suggestions for
23
+ improvements. Please refer to the current edition of the "Internet
24
+ Official Protocol Standards" (STD 1) for the standardization state
25
+ and status of this protocol. Distribution of this memo is unlimited.
26
+
27
+ Abstract
28
+
29
+ This document specifies a Uniform Resource Locator (URL), the syntax
30
+ and semantics of formalized information for location and access of
31
+ resources via the Internet.
32
+
33
+ 1. Introduction
34
+
35
+ This document describes the syntax and semantics for a compact string
36
+ representation for a resource available via the Internet. These
37
+ strings are called "Uniform Resource Locators" (URLs).
38
+
39
+ The specification is derived from concepts introduced by the World-
40
+ Wide Web global information initiative, whose use of such objects
41
+ dates from 1990 and is described in "Universal Resource Identifiers
42
+ in WWW", RFC 1630. The specification of URLs is designed to meet the
43
+ requirements laid out in "Functional Requirements for Internet
44
+ Resource Locators" [12].
45
+
46
+ This document was written by the URI working group of the Internet
47
+ Engineering Task Force. Comments may be addressed to the editors, or
48
+ to the URI-WG <uri@bunyip.com>. Discussions of the group are archived
49
+ at <URL:http://www.acl.lanl.gov/URI/archive/uri-archive.index.html>
50
+
51
+
52
+
53
+
54
+
55
+
56
+
57
+
58
+ Berners-Lee, Masinter & McCahill [Page 1]
59
+
60
+ RFC 1738 Uniform Resource Locators (URL) December 1994
61
+
62
+
63
+ 2. General URL Syntax
64
+
65
+ Just as there are many different methods of access to resources,
66
+ there are several schemes for describing the location of such
67
+ resources.
68
+
69
+ The generic syntax for URLs provides a framework for new schemes to
70
+ be established using protocols other than those defined in this
71
+ document.
72
+
73
+ URLs are used to `locate' resources, by providing an abstract
74
+ identification of the resource location. Having located a resource,
75
+ a system may perform a variety of operations on the resource, as
76
+ might be characterized by such words as `access', `update',
77
+ `replace', `find attributes'. In general, only the `access' method
78
+ needs to be specified for any URL scheme.
79
+
80
+ 2.1. The main parts of URLs
81
+
82
+ A full BNF description of the URL syntax is given in Section 5.
83
+
84
+ In general, URLs are written as follows:
85
+
86
+ <scheme>:<scheme-specific-part>
87
+
88
+ A URL contains the name of the scheme being used (<scheme>) followed
89
+ by a colon and then a string (the <scheme-specific-part>) whose
90
+ interpretation depends on the scheme.
91
+
92
+ Scheme names consist of a sequence of characters. The lower case
93
+ letters "a"--"z", digits, and the characters plus ("+"), period
94
+ ("."), and hyphen ("-") are allowed. For resiliency, programs
95
+ interpreting URLs should treat upper case letters as equivalent to
96
+ lower case in scheme names (e.g., allow "HTTP" as well as "http").
97
+
98
+ 2.2. URL Character Encoding Issues
99
+
100
+ URLs are sequences of characters, i.e., letters, digits, and special
101
+ characters. A URLs may be represented in a variety of ways: e.g., ink
102
+ on paper, or a sequence of octets in a coded character set. The
103
+ interpretation of a URL depends only on the identity of the
104
+ characters used.
105
+
106
+ In most URL schemes, the sequences of characters in different parts
107
+ of a URL are used to represent sequences of octets used in Internet
108
+ protocols. For example, in the ftp scheme, the host name, directory
109
+ name and file names are such sequences of octets, represented by
110
+ parts of the URL. Within those parts, an octet may be represented by
111
+
112
+
113
+
114
+ Berners-Lee, Masinter & McCahill [Page 2]
115
+
116
+ RFC 1738 Uniform Resource Locators (URL) December 1994
117
+
118
+
119
+ the chararacter which has that octet as its code within the US-ASCII
120
+ [20] coded character set.
121
+
122
+ In addition, octets may be encoded by a character triplet consisting
123
+ of the character "%" followed by the two hexadecimal digits (from
124
+ "0123456789ABCDEF") which forming the hexadecimal value of the octet.
125
+ (The characters "abcdef" may also be used in hexadecimal encodings.)
126
+
127
+ Octets must be encoded if they have no corresponding graphic
128
+ character within the US-ASCII coded character set, if the use of the
129
+ corresponding character is unsafe, or if the corresponding character
130
+ is reserved for some other interpretation within the particular URL
131
+ scheme.
132
+
133
+ No corresponding graphic US-ASCII:
134
+
135
+ URLs are written only with the graphic printable characters of the
136
+ US-ASCII coded character set. The octets 80-FF hexadecimal are not
137
+ used in US-ASCII, and the octets 00-1F and 7F hexadecimal represent
138
+ control characters; these must be encoded.
139
+
140
+ Unsafe:
141
+
142
+ Characters can be unsafe for a number of reasons. The space
143
+ character is unsafe because significant spaces may disappear and
144
+ insignificant spaces may be introduced when URLs are transcribed or
145
+ typeset or subjected to the treatment of word-processing programs.
146
+ The characters "<" and ">" are unsafe because they are used as the
147
+ delimiters around URLs in free text; the quote mark (""") is used to
148
+ delimit URLs in some systems. The character "#" is unsafe and should
149
+ always be encoded because it is used in World Wide Web and in other
150
+ systems to delimit a URL from a fragment/anchor identifier that might
151
+ follow it. The character "%" is unsafe because it is used for
152
+ encodings of other characters. Other characters are unsafe because
153
+ gateways and other transport agents are known to sometimes modify
154
+ such characters. These characters are "{", "}", "|", "\", "^", "~",
155
+ "[", "]", and "`".
156
+
157
+ All unsafe characters must always be encoded within a URL. For
158
+ example, the character "#" must be encoded within URLs even in
159
+ systems that do not normally deal with fragment or anchor
160
+ identifiers, so that if the URL is copied into another system that
161
+ does use them, it will not be necessary to change the URL encoding.
162
+
163
+
164
+
165
+
166
+
167
+
168
+
169
+
170
+ Berners-Lee, Masinter & McCahill [Page 3]
171
+
172
+ RFC 1738 Uniform Resource Locators (URL) December 1994
173
+
174
+
175
+ Reserved:
176
+
177
+ Many URL schemes reserve certain characters for a special meaning:
178
+ their appearance in the scheme-specific part of the URL has a
179
+ designated semantics. If the character corresponding to an octet is
180
+ reserved in a scheme, the octet must be encoded. The characters ";",
181
+ "/", "?", ":", "@", "=" and "&" are the characters which may be
182
+ reserved for special meaning within a scheme. No other characters may
183
+ be reserved within a scheme.
184
+
185
+ Usually a URL has the same interpretation when an octet is
186
+ represented by a character and when it encoded. However, this is not
187
+ true for reserved characters: encoding a character reserved for a
188
+ particular scheme may change the semantics of a URL.
189
+
190
+ Thus, only alphanumerics, the special characters "$-_.+!*'(),", and
191
+ reserved characters used for their reserved purposes may be used
192
+ unencoded within a URL.
193
+
194
+ On the other hand, characters that are not required to be encoded
195
+ (including alphanumerics) may be encoded within the scheme-specific
196
+ part of a URL, as long as they are not being used for a reserved
197
+ purpose.
198
+
199
+ 2.3 Hierarchical schemes and relative links
200
+
201
+ In some cases, URLs are used to locate resources that contain
202
+ pointers to other resources. In some cases, those pointers are
203
+ represented as relative links where the expression of the location of
204
+ the second resource is in terms of "in the same place as this one
205
+ except with the following relative path". Relative links are not
206
+ described in this document. However, the use of relative links
207
+ depends on the original URL containing a hierarchical structure
208
+ against which the relative link is based.
209
+
210
+ Some URL schemes (such as the ftp, http, and file schemes) contain
211
+ names that can be considered hierarchical; the components of the
212
+ hierarchy are separated by "/".
213
+
214
+
215
+
216
+
217
+
218
+
219
+
220
+
221
+
222
+
223
+
224
+
225
+
226
+ Berners-Lee, Masinter & McCahill [Page 4]
227
+
228
+ RFC 1738 Uniform Resource Locators (URL) December 1994
229
+
230
+
231
+ 3. Specific Schemes
232
+
233
+ The mapping for some existing standard and experimental protocols is
234
+ outlined in the BNF syntax definition. Notes on particular protocols
235
+ follow. The schemes covered are:
236
+
237
+ ftp File Transfer protocol
238
+ http Hypertext Transfer Protocol
239
+ gopher The Gopher protocol
240
+ mailto Electronic mail address
241
+ news USENET news
242
+ nntp USENET news using NNTP access
243
+ telnet Reference to interactive sessions
244
+ wais Wide Area Information Servers
245
+ file Host-specific file names
246
+ prospero Prospero Directory Service
247
+
248
+ Other schemes may be specified by future specifications. Section 4 of
249
+ this document describes how new schemes may be registered, and lists
250
+ some scheme names that are under development.
251
+
252
+ 3.1. Common Internet Scheme Syntax
253
+
254
+ While the syntax for the rest of the URL may vary depending on the
255
+ particular scheme selected, URL schemes that involve the direct use
256
+ of an IP-based protocol to a specified host on the Internet use a
257
+ common syntax for the scheme-specific data:
258
+
259
+ //<user>:<password>@<host>:<port>/<url-path>
260
+
261
+ Some or all of the parts "<user>:<password>@", ":<password>",
262
+ ":<port>", and "/<url-path>" may be excluded. The scheme specific
263
+ data start with a double slash "//" to indicate that it complies with
264
+ the common Internet scheme syntax. The different components obey the
265
+ following rules:
266
+
267
+ user
268
+ An optional user name. Some schemes (e.g., ftp) allow the
269
+ specification of a user name.
270
+
271
+ password
272
+ An optional password. If present, it follows the user
273
+ name separated from it by a colon.
274
+
275
+ The user name (and password), if present, are followed by a
276
+ commercial at-sign "@". Within the user and password field, any ":",
277
+ "@", or "/" must be encoded.
278
+
279
+
280
+
281
+
282
+ Berners-Lee, Masinter & McCahill [Page 5]
283
+
284
+ RFC 1738 Uniform Resource Locators (URL) December 1994
285
+
286
+
287
+ Note that an empty user name or password is different than no user
288
+ name or password; there is no way to specify a password without
289
+ specifying a user name. E.g., <URL:ftp://@host.com/> has an empty
290
+ user name and no password, <URL:ftp://host.com/> has no user name,
291
+ while <URL:ftp://foo:@host.com/> has a user name of "foo" and an
292
+ empty password.
293
+
294
+ host
295
+ The fully qualified domain name of a network host, or its IP
296
+ address as a set of four decimal digit groups separated by
297
+ ".". Fully qualified domain names take the form as described
298
+ in Section 3.5 of RFC 1034 [13] and Section 2.1 of RFC 1123
299
+ [5]: a sequence of domain labels separated by ".", each domain
300
+ label starting and ending with an alphanumerical character and
301
+ possibly also containing "-" characters. The rightmost domain
302
+ label will never start with a digit, though, which
303
+ syntactically distinguishes all domain names from the IP
304
+ addresses.
305
+
306
+ port
307
+ The port number to connect to. Most schemes designate
308
+ protocols that have a default port number. Another port number
309
+ may optionally be supplied, in decimal, separated from the
310
+ host by a colon. If the port is omitted, the colon is as well.
311
+
312
+ url-path
313
+ The rest of the locator consists of data specific to the
314
+ scheme, and is known as the "url-path". It supplies the
315
+ details of how the specified resource can be accessed. Note
316
+ that the "/" between the host (or port) and the url-path is
317
+ NOT part of the url-path.
318
+
319
+ The url-path syntax depends on the scheme being used, as does the
320
+ manner in which it is interpreted.
321
+
322
+ 3.2. FTP
323
+
324
+ The FTP URL scheme is used to designate files and directories on
325
+ Internet hosts accessible using the FTP protocol (RFC959).
326
+
327
+ A FTP URL follow the syntax described in Section 3.1. If :<port> is
328
+ omitted, the port defaults to 21.
329
+
330
+
331
+
332
+
333
+
334
+
335
+
336
+
337
+
338
+ Berners-Lee, Masinter & McCahill [Page 6]
339
+
340
+ RFC 1738 Uniform Resource Locators (URL) December 1994
341
+
342
+
343
+ 3.2.1. FTP Name and Password
344
+
345
+ A user name and password may be supplied; they are used in the ftp
346
+ "USER" and "PASS" commands after first making the connection to the
347
+ FTP server. If no user name or password is supplied and one is
348
+ requested by the FTP server, the conventions for "anonymous" FTP are
349
+ to be used, as follows:
350
+
351
+ The user name "anonymous" is supplied.
352
+
353
+ The password is supplied as the Internet e-mail address
354
+ of the end user accessing the resource.
355
+
356
+ If the URL supplies a user name but no password, and the remote
357
+ server requests a password, the program interpreting the FTP URL
358
+ should request one from the user.
359
+
360
+ 3.2.2. FTP url-path
361
+
362
+ The url-path of a FTP URL has the following syntax:
363
+
364
+ <cwd1>/<cwd2>/.../<cwdN>/<name>;type=<typecode>
365
+
366
+ Where <cwd1> through <cwdN> and <name> are (possibly encoded) strings
367
+ and <typecode> is one of the characters "a", "i", or "d". The part
368
+ ";type=<typecode>" may be omitted. The <cwdx> and <name> parts may be
369
+ empty. The whole url-path may be omitted, including the "/"
370
+ delimiting it from the prefix containing user, password, host, and
371
+ port.
372
+
373
+ The url-path is interpreted as a series of FTP commands as follows:
374
+
375
+ Each of the <cwd> elements is to be supplied, sequentially, as the
376
+ argument to a CWD (change working directory) command.
377
+
378
+ If the typecode is "d", perform a NLST (name list) command with
379
+ <name> as the argument, and interpret the results as a file
380
+ directory listing.
381
+
382
+ Otherwise, perform a TYPE command with <typecode> as the argument,
383
+ and then access the file whose name is <name> (for example, using
384
+ the RETR command.)
385
+
386
+ Within a name or CWD component, the characters "/" and ";" are
387
+ reserved and must be encoded. The components are decoded prior to
388
+ their use in the FTP protocol. In particular, if the appropriate FTP
389
+ sequence to access a particular file requires supplying a string
390
+ containing a "/" as an argument to a CWD or RETR command, it is
391
+
392
+
393
+
394
+ Berners-Lee, Masinter & McCahill [Page 7]
395
+
396
+ RFC 1738 Uniform Resource Locators (URL) December 1994
397
+
398
+
399
+ necessary to encode each "/".
400
+
401
+ For example, the URL <URL:ftp://myname@host.dom/%2Fetc/motd> is
402
+ interpreted by FTP-ing to "host.dom", logging in as "myname"
403
+ (prompting for a password if it is asked for), and then executing
404
+ "CWD /etc" and then "RETR motd". This has a different meaning from
405
+ <URL:ftp://myname@host.dom/etc/motd> which would "CWD etc" and then
406
+ "RETR motd"; the initial "CWD" might be executed relative to the
407
+ default directory for "myname". On the other hand,
408
+ <URL:ftp://myname@host.dom//etc/motd>, would "CWD " with a null
409
+ argument, then "CWD etc", and then "RETR motd".
410
+
411
+ FTP URLs may also be used for other operations; for example, it is
412
+ possible to update a file on a remote file server, or infer
413
+ information about it from the directory listings. The mechanism for
414
+ doing so is not spelled out here.
415
+
416
+ 3.2.3. FTP Typecode is Optional
417
+
418
+ The entire ;type=<typecode> part of a FTP URL is optional. If it is
419
+ omitted, the client program interpreting the URL must guess the
420
+ appropriate mode to use. In general, the data content type of a file
421
+ can only be guessed from the name, e.g., from the suffix of the name;
422
+ the appropriate type code to be used for transfer of the file can
423
+ then be deduced from the data content of the file.
424
+
425
+ 3.2.4 Hierarchy
426
+
427
+ For some file systems, the "/" used to denote the hierarchical
428
+ structure of the URL corresponds to the delimiter used to construct a
429
+ file name hierarchy, and thus, the filename will look similar to the
430
+ URL path. This does NOT mean that the URL is a Unix filename.
431
+
432
+ 3.2.5. Optimization
433
+
434
+ Clients accessing resources via FTP may employ additional heuristics
435
+ to optimize the interaction. For some FTP servers, for example, it
436
+ may be reasonable to keep the control connection open while accessing
437
+ multiple URLs from the same server. However, there is no common
438
+ hierarchical model to the FTP protocol, so if a directory change
439
+ command has been given, it is impossible in general to deduce what
440
+ sequence should be given to navigate to another directory for a
441
+ second retrieval, if the paths are different. The only reliable
442
+ algorithm is to disconnect and reestablish the control connection.
443
+
444
+
445
+
446
+
447
+
448
+
449
+
450
+ Berners-Lee, Masinter & McCahill [Page 8]
451
+
452
+ RFC 1738 Uniform Resource Locators (URL) December 1994
453
+
454
+
455
+ 3.3. HTTP
456
+
457
+ The HTTP URL scheme is used to designate Internet resources
458
+ accessible using HTTP (HyperText Transfer Protocol).
459
+
460
+ The HTTP protocol is specified elsewhere. This specification only
461
+ describes the syntax of HTTP URLs.
462
+
463
+ An HTTP URL takes the form:
464
+
465
+ http://<host>:<port>/<path>?<searchpart>
466
+
467
+ where <host> and <port> are as described in Section 3.1. If :<port>
468
+ is omitted, the port defaults to 80. No user name or password is
469
+ allowed. <path> is an HTTP selector, and <searchpart> is a query
470
+ string. The <path> is optional, as is the <searchpart> and its
471
+ preceding "?". If neither <path> nor <searchpart> is present, the "/"
472
+ may also be omitted.
473
+
474
+ Within the <path> and <searchpart> components, "/", ";", "?" are
475
+ reserved. The "/" character may be used within HTTP to designate a
476
+ hierarchical structure.
477
+
478
+ 3.4. GOPHER
479
+
480
+ The Gopher URL scheme is used to designate Internet resources
481
+ accessible using the Gopher protocol.
482
+
483
+ The base Gopher protocol is described in RFC 1436 and supports items
484
+ and collections of items (directories). The Gopher+ protocol is a set
485
+ of upward compatible extensions to the base Gopher protocol and is
486
+ described in [2]. Gopher+ supports associating arbitrary sets of
487
+ attributes and alternate data representations with Gopher items.
488
+ Gopher URLs accommodate both Gopher and Gopher+ items and item
489
+ attributes.
490
+
491
+ 3.4.1. Gopher URL syntax
492
+
493
+ A Gopher URL takes the form:
494
+
495
+ gopher://<host>:<port>/<gopher-path>
496
+
497
+ where <gopher-path> is one of
498
+
499
+ <gophertype><selector>
500
+ <gophertype><selector>%09<search>
501
+ <gophertype><selector>%09<search>%09<gopher+_string>
502
+
503
+
504
+
505
+
506
+ Berners-Lee, Masinter & McCahill [Page 9]
507
+
508
+ RFC 1738 Uniform Resource Locators (URL) December 1994
509
+
510
+
511
+ If :<port> is omitted, the port defaults to 70. <gophertype> is a
512
+ single-character field to denote the Gopher type of the resource to
513
+ which the URL refers. The entire <gopher-path> may also be empty, in
514
+ which case the delimiting "/" is also optional and the <gophertype>
515
+ defaults to "1".
516
+
517
+ <selector> is the Gopher selector string. In the Gopher protocol,
518
+ Gopher selector strings are a sequence of octets which may contain
519
+ any octets except 09 hexadecimal (US-ASCII HT or tab) 0A hexadecimal
520
+ (US-ASCII character LF), and 0D (US-ASCII character CR).
521
+
522
+ Gopher clients specify which item to retrieve by sending the Gopher
523
+ selector string to a Gopher server.
524
+
525
+ Within the <gopher-path>, no characters are reserved.
526
+
527
+ Note that some Gopher <selector> strings begin with a copy of the
528
+ <gophertype> character, in which case that character will occur twice
529
+ consecutively. The Gopher selector string may be an empty string;
530
+ this is how Gopher clients refer to the top-level directory on a
531
+ Gopher server.
532
+
533
+ 3.4.2 Specifying URLs for Gopher Search Engines
534
+
535
+ If the URL refers to a search to be submitted to a Gopher search
536
+ engine, the selector is followed by an encoded tab (%09) and the
537
+ search string. To submit a search to a Gopher search engine, the
538
+ Gopher client sends the <selector> string (after decoding), a tab,
539
+ and the search string to the Gopher server.
540
+
541
+ 3.4.3 URL syntax for Gopher+ items
542
+
543
+ URLs for Gopher+ items have a second encoded tab (%09) and a Gopher+
544
+ string. Note that in this case, the %09<search> string must be
545
+ supplied, although the <search> element may be the empty string.
546
+
547
+ The <gopher+_string> is used to represent information required for
548
+ retrieval of the Gopher+ item. Gopher+ items may have alternate
549
+ views, arbitrary sets of attributes, and may have electronic forms
550
+ associated with them.
551
+
552
+ To retrieve the data associated with a Gopher+ URL, a client will
553
+ connect to the server and send the Gopher selector, followed by a tab
554
+ and the search string (which may be empty), followed by a tab and the
555
+ Gopher+ commands.
556
+
557
+
558
+
559
+
560
+
561
+
562
+ Berners-Lee, Masinter & McCahill [Page 10]
563
+
564
+ RFC 1738 Uniform Resource Locators (URL) December 1994
565
+
566
+
567
+ 3.4.4 Default Gopher+ data representation
568
+
569
+ When a Gopher server returns a directory listing to a client, the
570
+ Gopher+ items are tagged with either a "+" (denoting Gopher+ items)
571
+ or a "?" (denoting Gopher+ items which have a +ASK form associated
572
+ with them). A Gopher URL with a Gopher+ string consisting of only a
573
+ "+" refers to the default view (data representation) of the item
574
+ while a Gopher+ string containing only a "?" refer to an item with a
575
+ Gopher electronic form associated with it.
576
+
577
+ 3.4.5 Gopher+ items with electronic forms
578
+
579
+ Gopher+ items which have a +ASK associated with them (i.e. Gopher+
580
+ items tagged with a "?") require the client to fetch the item's +ASK
581
+ attribute to get the form definition, and then ask the user to fill
582
+ out the form and return the user's responses along with the selector
583
+ string to retrieve the item. Gopher+ clients know how to do this but
584
+ depend on the "?" tag in the Gopher+ item description to know when to
585
+ handle this case. The "?" is used in the Gopher+ string to be
586
+ consistent with Gopher+ protocol's use of this symbol.
587
+
588
+ 3.4.6 Gopher+ item attribute collections
589
+
590
+ To refer to the Gopher+ attributes of an item, the Gopher URL's
591
+ Gopher+ string consists of "!" or "$". "!" refers to the all of a
592
+ Gopher+ item's attributes. "$" refers to all the item attributes for
593
+ all items in a Gopher directory.
594
+
595
+ 3.4.7 Referring to specific Gopher+ attributes
596
+
597
+ To refer to specific attributes, the URL's gopher+_string is
598
+ "!<attribute_name>" or "$<attribute_name>". For example, to refer to
599
+ the attribute containing the abstract of an item, the gopher+_string
600
+ would be "!+ABSTRACT".
601
+
602
+ To refer to several attributes, the gopher+_string consists of the
603
+ attribute names separated by coded spaces. For example,
604
+ "!+ABSTRACT%20+SMELL" refers to the +ABSTRACT and +SMELL attributes
605
+ of an item.
606
+
607
+ 3.4.8 URL syntax for Gopher+ alternate views
608
+
609
+ Gopher+ allows for optional alternate data representations (alternate
610
+ views) of items. To retrieve a Gopher+ alternate view, a Gopher+
611
+ client sends the appropriate view and language identifier (found in
612
+ the item's +VIEW attribute). To refer to a specific Gopher+ alternate
613
+ view, the URL's Gopher+ string would be in the form:
614
+
615
+
616
+
617
+
618
+ Berners-Lee, Masinter & McCahill [Page 11]
619
+
620
+ RFC 1738 Uniform Resource Locators (URL) December 1994
621
+
622
+
623
+ +<view_name>%20<language_name>
624
+
625
+ For example, a Gopher+ string of "+application/postscript%20Es_ES"
626
+ refers to the Spanish language postscript alternate view of a Gopher+
627
+ item.
628
+
629
+ 3.4.9 URL syntax for Gopher+ electronic forms
630
+
631
+ The gopher+_string for a URL that refers to an item referenced by a
632
+ Gopher+ electronic form (an ASK block) filled out with specific
633
+ values is a coded version of what the client sends to the server.
634
+ The gopher+_string is of the form:
635
+
636
+ +%091%0D%0A+-1%0D%0A<ask_item1_value>%0D%0A<ask_item2_value>%0D%0A.%0D%0A
637
+
638
+ To retrieve this item, the Gopher client sends:
639
+
640
+ <a_gopher_selector><tab>+<tab>1<cr><lf>
641
+ +-1<cr><lf>
642
+ <ask_item1_value><cr><lf>
643
+ <ask_item2_value><cr><lf>
644
+ .<cr><lf>
645
+
646
+ to the Gopher server.
647
+
648
+ 3.5. MAILTO
649
+
650
+ The mailto URL scheme is used to designate the Internet mailing
651
+ address of an individual or service. No additional information other
652
+ than an Internet mailing address is present or implied.
653
+
654
+ A mailto URL takes the form:
655
+
656
+ mailto:<rfc822-addr-spec>
657
+
658
+ where <rfc822-addr-spec> is (the encoding of an) addr-spec, as
659
+ specified in RFC 822 [6]. Within mailto URLs, there are no reserved
660
+ characters.
661
+
662
+ Note that the percent sign ("%") is commonly used within RFC 822
663
+ addresses and must be encoded.
664
+
665
+ Unlike many URLs, the mailto scheme does not represent a data object
666
+ to be accessed directly; there is no sense in which it designates an
667
+ object. It has a different use than the message/external-body type in
668
+ MIME.
669
+
670
+
671
+
672
+
673
+
674
+ Berners-Lee, Masinter & McCahill [Page 12]
675
+
676
+ RFC 1738 Uniform Resource Locators (URL) December 1994
677
+
678
+
679
+ 3.6. NEWS
680
+
681
+ The news URL scheme is used to refer to either news groups or
682
+ individual articles of USENET news, as specified in RFC 1036.
683
+
684
+ A news URL takes one of two forms:
685
+
686
+ news:<newsgroup-name>
687
+ news:<message-id>
688
+
689
+ A <newsgroup-name> is a period-delimited hierarchical name, such as
690
+ "comp.infosystems.www.misc". A <message-id> corresponds to the
691
+ Message-ID of section 2.1.5 of RFC 1036, without the enclosing "<"
692
+ and ">"; it takes the form <unique>@<full_domain_name>. A message
693
+ identifier may be distinguished from a news group name by the
694
+ presence of the commercial at "@" character. No additional characters
695
+ are reserved within the components of a news URL.
696
+
697
+ If <newsgroup-name> is "*" (as in <URL:news:*>), it is used to refer
698
+ to "all available news groups".
699
+
700
+ The news URLs are unusual in that by themselves, they do not contain
701
+ sufficient information to locate a single resource, but, rather, are
702
+ location-independent.
703
+
704
+ 3.7. NNTP
705
+
706
+ The nntp URL scheme is an alternative method of referencing news
707
+ articles, useful for specifying news articles from NNTP servers (RFC
708
+ 977).
709
+
710
+ A nntp URL take the form:
711
+
712
+ nntp://<host>:<port>/<newsgroup-name>/<article-number>
713
+
714
+ where <host> and <port> are as described in Section 3.1. If :<port>
715
+ is omitted, the port defaults to 119.
716
+
717
+ The <newsgroup-name> is the name of the group, while the <article-
718
+ number> is the numeric id of the article within that newsgroup.
719
+
720
+ Note that while nntp: URLs specify a unique location for the article
721
+ resource, most NNTP servers currently on the Internet today are
722
+ configured only to allow access from local clients, and thus nntp
723
+ URLs do not designate globally accessible resources. Thus, the news:
724
+ form of URL is preferred as a way of identifying news articles.
725
+
726
+
727
+
728
+
729
+
730
+ Berners-Lee, Masinter & McCahill [Page 13]
731
+
732
+ RFC 1738 Uniform Resource Locators (URL) December 1994
733
+
734
+
735
+ 3.8. TELNET
736
+
737
+ The Telnet URL scheme is used to designate interactive services that
738
+ may be accessed by the Telnet protocol.
739
+
740
+ A telnet URL takes the form:
741
+
742
+ telnet://<user>:<password>@<host>:<port>/
743
+
744
+ as specified in Section 3.1. The final "/" character may be omitted.
745
+ If :<port> is omitted, the port defaults to 23. The :<password> can
746
+ be omitted, as well as the whole <user>:<password> part.
747
+
748
+ This URL does not designate a data object, but rather an interactive
749
+ service. Remote interactive services vary widely in the means by
750
+ which they allow remote logins; in practice, the <user> and
751
+ <password> supplied are advisory only: clients accessing a telnet URL
752
+ merely advise the user of the suggested username and password.
753
+
754
+ 3.9. WAIS
755
+
756
+ The WAIS URL scheme is used to designate WAIS databases, searches, or
757
+ individual documents available from a WAIS database. WAIS is
758
+ described in [7]. The WAIS protocol is described in RFC 1625 [17];
759
+ Although the WAIS protocol is based on Z39.50-1988, the WAIS URL
760
+ scheme is not intended for use with arbitrary Z39.50 services.
761
+
762
+ A WAIS URL takes one of the following forms:
763
+
764
+ wais://<host>:<port>/<database>
765
+ wais://<host>:<port>/<database>?<search>
766
+ wais://<host>:<port>/<database>/<wtype>/<wpath>
767
+
768
+ where <host> and <port> are as described in Section 3.1. If :<port>
769
+ is omitted, the port defaults to 210. The first form designates a
770
+ WAIS database that is available for searching. The second form
771
+ designates a particular search. <database> is the name of the WAIS
772
+ database being queried.
773
+
774
+ The third form designates a particular document within a WAIS
775
+ database to be retrieved. In this form <wtype> is the WAIS
776
+ designation of the type of the object. Many WAIS implementations
777
+ require that a client know the "type" of an object prior to
778
+ retrieval, the type being returned along with the internal object
779
+ identifier in the search response. The <wtype> is included in the
780
+ URL in order to allow the client interpreting the URL adequate
781
+ information to actually retrieve the document.
782
+
783
+
784
+
785
+
786
+ Berners-Lee, Masinter & McCahill [Page 14]
787
+
788
+ RFC 1738 Uniform Resource Locators (URL) December 1994
789
+
790
+
791
+ The <wpath> of a WAIS URL consists of the WAIS document-id, encoded
792
+ as necessary using the method described in Section 2.2. The WAIS
793
+ document-id should be treated opaquely; it may only be decomposed by
794
+ the server that issued it.
795
+
796
+ 3.10 FILES
797
+
798
+ The file URL scheme is used to designate files accessible on a
799
+ particular host computer. This scheme, unlike most other URL schemes,
800
+ does not designate a resource that is universally accessible over the
801
+ Internet.
802
+
803
+ A file URL takes the form:
804
+
805
+ file://<host>/<path>
806
+
807
+ where <host> is the fully qualified domain name of the system on
808
+ which the <path> is accessible, and <path> is a hierarchical
809
+ directory path of the form <directory>/<directory>/.../<name>.
810
+
811
+ For example, a VMS file
812
+
813
+ DISK$USER:[MY.NOTES]NOTE123456.TXT
814
+
815
+ might become
816
+
817
+ <URL:file://vms.host.edu/disk$user/my/notes/note12345.txt>
818
+
819
+ As a special case, <host> can be the string "localhost" or the empty
820
+ string; this is interpreted as `the machine from which the URL is
821
+ being interpreted'.
822
+
823
+ The file URL scheme is unusual in that it does not specify an
824
+ Internet protocol or access method for such files; as such, its
825
+ utility in network protocols between hosts is limited.
826
+
827
+ 3.11 PROSPERO
828
+
829
+ The Prospero URL scheme is used to designate resources that are
830
+ accessed via the Prospero Directory Service. The Prospero protocol is
831
+ described elsewhere [14].
832
+
833
+ A prospero URLs takes the form:
834
+
835
+ prospero://<host>:<port>/<hsoname>;<field>=<value>
836
+
837
+ where <host> and <port> are as described in Section 3.1. If :<port>
838
+ is omitted, the port defaults to 1525. No username or password is
839
+
840
+
841
+
842
+ Berners-Lee, Masinter & McCahill [Page 15]
843
+
844
+ RFC 1738 Uniform Resource Locators (URL) December 1994
845
+
846
+
847
+ allowed.
848
+
849
+ The <hsoname> is the host-specific object name in the Prospero
850
+ protocol, suitably encoded. This name is opaque and interpreted by
851
+ the Prospero server. The semicolon ";" is reserved and may not
852
+ appear without quoting in the <hsoname>.
853
+
854
+ Prospero URLs are interpreted by contacting a Prospero directory
855
+ server on the specified host and port to determine appropriate access
856
+ methods for a resource, which might themselves be represented as
857
+ different URLs. External Prospero links are represented as URLs of
858
+ the underlying access method and are not represented as Prospero
859
+ URLs.
860
+
861
+ Note that a slash "/" may appear in the <hsoname> without quoting and
862
+ no significance may be assumed by the application. Though slashes
863
+ may indicate hierarchical structure on the server, such structure is
864
+ not guaranteed. Note that many <hsoname>s begin with a slash, in
865
+ which case the host or port will be followed by a double slash: the
866
+ slash from the URL syntax, followed by the initial slash from the
867
+ <hsoname>. (E.g., <URL:prospero://host.dom//pros/name> designates a
868
+ <hsoname> of "/pros/name".)
869
+
870
+ In addition, after the <hsoname>, optional fields and values
871
+ associated with a Prospero link may be specified as part of the URL.
872
+ When present, each field/value pair is separated from each other and
873
+ from the rest of the URL by a ";" (semicolon). The name of the field
874
+ and its value are separated by a "=" (equal sign). If present, these
875
+ fields serve to identify the target of the URL. For example, the
876
+ OBJECT-VERSION field can be specified to identify a specific version
877
+ of an object.
878
+
879
+ 4. REGISTRATION OF NEW SCHEMES
880
+
881
+ A new scheme may be introduced by defining a mapping onto a
882
+ conforming URL syntax, using a new prefix. URLs for experimental
883
+ schemes may be used by mutual agreement between parties. Scheme names
884
+ starting with the characters "x-" are reserved for experimental
885
+ purposes.
886
+
887
+ The Internet Assigned Numbers Authority (IANA) will maintain a
888
+ registry of URL schemes. Any submission of a new URL scheme must
889
+ include a definition of an algorithm for accessing of resources
890
+ within that scheme and the syntax for representing such a scheme.
891
+
892
+ URL schemes must have demonstrable utility and operability. One way
893
+ to provide such a demonstration is via a gateway which provides
894
+ objects in the new scheme for clients using an existing protocol. If
895
+
896
+
897
+
898
+ Berners-Lee, Masinter & McCahill [Page 16]
899
+
900
+ RFC 1738 Uniform Resource Locators (URL) December 1994
901
+
902
+
903
+ the new scheme does not locate resources that are data objects, the
904
+ properties of names in the new space must be clearly defined.
905
+
906
+ New schemes should try to follow the same syntactic conventions of
907
+ existing schemes, where appropriate. It is likewise recommended
908
+ that, where a protocol allows for retrieval by URL, that the client
909
+ software have provision for being configured to use specific gateway
910
+ locators for indirect access through new naming schemes.
911
+
912
+ The following scheme have been proposed at various times, but this
913
+ document does not define their syntax or use at this time. It is
914
+ suggested that IANA reserve their scheme names for future definition:
915
+
916
+ afs Andrew File System global file names.
917
+ mid Message identifiers for electronic mail.
918
+ cid Content identifiers for MIME body parts.
919
+ nfs Network File System (NFS) file names.
920
+ tn3270 Interactive 3270 emulation sessions.
921
+ mailserver Access to data available from mail servers.
922
+ z39.50 Access to ANSI Z39.50 services.
923
+
924
+ 5. BNF for specific URL schemes
925
+
926
+ This is a BNF-like description of the Uniform Resource Locator
927
+ syntax, using the conventions of RFC822, except that "|" is used to
928
+ designate alternatives, and brackets [] are used around optional or
929
+ repeated elements. Briefly, literals are quoted with "", optional
930
+ elements are enclosed in [brackets], and elements may be preceded
931
+ with <n>* to designate n or more repetitions of the following
932
+ element; n defaults to 0.
933
+
934
+ ; The generic form of a URL is:
935
+
936
+ genericurl = scheme ":" schemepart
937
+
938
+ ; Specific predefined schemes are defined here; new schemes
939
+ ; may be registered with IANA
940
+
941
+ url = httpurl | ftpurl | newsurl |
942
+ nntpurl | telneturl | gopherurl |
943
+ waisurl | mailtourl | fileurl |
944
+ prosperourl | otherurl
945
+
946
+ ; new schemes follow the general syntax
947
+ otherurl = genericurl
948
+
949
+ ; the scheme is in lower case; interpreters should use case-ignore
950
+ scheme = 1*[ lowalpha | digit | "+" | "-" | "." ]
951
+
952
+
953
+
954
+ Berners-Lee, Masinter & McCahill [Page 17]
955
+
956
+ RFC 1738 Uniform Resource Locators (URL) December 1994
957
+
958
+
959
+ schemepart = *xchar | ip-schemepart
960
+
961
+
962
+ ; URL schemeparts for ip based protocols:
963
+
964
+ ip-schemepart = "//" login [ "/" urlpath ]
965
+
966
+ login = [ user [ ":" password ] "@" ] hostport
967
+ hostport = host [ ":" port ]
968
+ host = hostname | hostnumber
969
+ hostname = *[ domainlabel "." ] toplabel
970
+ domainlabel = alphadigit | alphadigit *[ alphadigit | "-" ] alphadigit
971
+ toplabel = alpha | alpha *[ alphadigit | "-" ] alphadigit
972
+ alphadigit = alpha | digit
973
+ hostnumber = digits "." digits "." digits "." digits
974
+ port = digits
975
+ user = *[ uchar | ";" | "?" | "&" | "=" ]
976
+ password = *[ uchar | ";" | "?" | "&" | "=" ]
977
+ urlpath = *xchar ; depends on protocol see section 3.1
978
+
979
+ ; The predefined schemes:
980
+
981
+ ; FTP (see also RFC959)
982
+
983
+ ftpurl = "ftp://" login [ "/" fpath [ ";type=" ftptype ]]
984
+ fpath = fsegment *[ "/" fsegment ]
985
+ fsegment = *[ uchar | "?" | ":" | "@" | "&" | "=" ]
986
+ ftptype = "A" | "I" | "D" | "a" | "i" | "d"
987
+
988
+ ; FILE
989
+
990
+ fileurl = "file://" [ host | "localhost" ] "/" fpath
991
+
992
+ ; HTTP
993
+
994
+ httpurl = "http://" hostport [ "/" hpath [ "?" search ]]
995
+ hpath = hsegment *[ "/" hsegment ]
996
+ hsegment = *[ uchar | ";" | ":" | "@" | "&" | "=" ]
997
+ search = *[ uchar | ";" | ":" | "@" | "&" | "=" ]
998
+
999
+ ; GOPHER (see also RFC1436)
1000
+
1001
+ gopherurl = "gopher://" hostport [ / [ gtype [ selector
1002
+ [ "%09" search [ "%09" gopher+_string ] ] ] ] ]
1003
+ gtype = xchar
1004
+ selector = *xchar
1005
+ gopher+_string = *xchar
1006
+
1007
+
1008
+
1009
+
1010
+ Berners-Lee, Masinter & McCahill [Page 18]
1011
+
1012
+ RFC 1738 Uniform Resource Locators (URL) December 1994
1013
+
1014
+
1015
+ ; MAILTO (see also RFC822)
1016
+
1017
+ mailtourl = "mailto:" encoded822addr
1018
+ encoded822addr = 1*xchar ; further defined in RFC822
1019
+
1020
+ ; NEWS (see also RFC1036)
1021
+
1022
+ newsurl = "news:" grouppart
1023
+ grouppart = "*" | group | article
1024
+ group = alpha *[ alpha | digit | "-" | "." | "+" | "_" ]
1025
+ article = 1*[ uchar | ";" | "/" | "?" | ":" | "&" | "=" ] "@" host
1026
+
1027
+ ; NNTP (see also RFC977)
1028
+
1029
+ nntpurl = "nntp://" hostport "/" group [ "/" digits ]
1030
+
1031
+ ; TELNET
1032
+
1033
+ telneturl = "telnet://" login [ "/" ]
1034
+
1035
+ ; WAIS (see also RFC1625)
1036
+
1037
+ waisurl = waisdatabase | waisindex | waisdoc
1038
+ waisdatabase = "wais://" hostport "/" database
1039
+ waisindex = "wais://" hostport "/" database "?" search
1040
+ waisdoc = "wais://" hostport "/" database "/" wtype "/" wpath
1041
+ database = *uchar
1042
+ wtype = *uchar
1043
+ wpath = *uchar
1044
+
1045
+ ; PROSPERO
1046
+
1047
+ prosperourl = "prospero://" hostport "/" ppath *[ fieldspec ]
1048
+ ppath = psegment *[ "/" psegment ]
1049
+ psegment = *[ uchar | "?" | ":" | "@" | "&" | "=" ]
1050
+ fieldspec = ";" fieldname "=" fieldvalue
1051
+ fieldname = *[ uchar | "?" | ":" | "@" | "&" ]
1052
+ fieldvalue = *[ uchar | "?" | ":" | "@" | "&" ]
1053
+
1054
+ ; Miscellaneous definitions
1055
+
1056
+ lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" |
1057
+ "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" |
1058
+ "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" |
1059
+ "y" | "z"
1060
+ hialpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
1061
+ "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
1062
+ "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
1063
+
1064
+
1065
+
1066
+ Berners-Lee, Masinter & McCahill [Page 19]
1067
+
1068
+ RFC 1738 Uniform Resource Locators (URL) December 1994
1069
+
1070
+
1071
+ alpha = lowalpha | hialpha
1072
+ digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
1073
+ "8" | "9"
1074
+ safe = "$" | "-" | "_" | "." | "+"
1075
+ extra = "!" | "*" | "'" | "(" | ")" | ","
1076
+ national = "{" | "}" | "|" | "\" | "^" | "~" | "[" | "]" | "`"
1077
+ punctuation = "<" | ">" | "#" | "%" | <">
1078
+
1079
+
1080
+ reserved = ";" | "/" | "?" | ":" | "@" | "&" | "="
1081
+ hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
1082
+ "a" | "b" | "c" | "d" | "e" | "f"
1083
+ escape = "%" hex hex
1084
+
1085
+ unreserved = alpha | digit | safe | extra
1086
+ uchar = unreserved | escape
1087
+ xchar = unreserved | reserved | escape
1088
+ digits = 1*digit
1089
+
1090
+ 6. Security Considerations
1091
+
1092
+ The URL scheme does not in itself pose a security threat. Users
1093
+ should beware that there is no general guarantee that a URL which at
1094
+ one time points to a given object continues to do so, and does not
1095
+ even at some later time point to a different object due to the
1096
+ movement of objects on servers.
1097
+
1098
+ A URL-related security threat is that it is sometimes possible to
1099
+ construct a URL such that an attempt to perform a harmless idempotent
1100
+ operation such as the retrieval of the object will in fact cause a
1101
+ possibly damaging remote operation to occur. The unsafe URL is
1102
+ typically constructed by specifying a port number other than that
1103
+ reserved for the network protocol in question. The client
1104
+ unwittingly contacts a server which is in fact running a different
1105
+ protocol. The content of the URL contains instructions which when
1106
+ interpreted according to this other protocol cause an unexpected
1107
+ operation. An example has been the use of gopher URLs to cause a rude
1108
+ message to be sent via a SMTP server. Caution should be used when
1109
+ using any URL which specifies a port number other than the default
1110
+ for the protocol, especially when it is a number within the reserved
1111
+ space.
1112
+
1113
+ Care should be taken when URLs contain embedded encoded delimiters
1114
+ for a given protocol (for example, CR and LF characters for telnet
1115
+ protocols) that these are not unencoded before transmission. This
1116
+ would violate the protocol but could be used to simulate an extra
1117
+ operation or parameter, again causing an unexpected and possible
1118
+ harmful remote operation to be performed.
1119
+
1120
+
1121
+
1122
+ Berners-Lee, Masinter & McCahill [Page 20]
1123
+
1124
+ RFC 1738 Uniform Resource Locators (URL) December 1994
1125
+
1126
+
1127
+ The use of URLs containing passwords that should be secret is clearly
1128
+ unwise.
1129
+
1130
+ 7. Acknowledgements
1131
+
1132
+ This paper builds on the basic WWW design (RFC 1630) and much
1133
+ discussion of these issues by many people on the network. The
1134
+ discussion was particularly stimulated by articles by Clifford Lynch,
1135
+ Brewster Kahle [10] and Wengyik Yeong [18]. Contributions from John
1136
+ Curran, Clifford Neuman, Ed Vielmetti and later the IETF URL BOF and
1137
+ URI working group were incorporated.
1138
+
1139
+ Most recently, careful readings and comments by Dan Connolly, Ned
1140
+ Freed, Roy Fielding, Guido van Rossum, Michael Dolan, Bert Bos, John
1141
+ Kunze, Olle Jarnefors, Peter Svanberg and many others have helped
1142
+ refine this RFC.
1143
+
1144
+
1145
+
1146
+
1147
+
1148
+
1149
+
1150
+
1151
+
1152
+
1153
+
1154
+
1155
+
1156
+
1157
+
1158
+
1159
+
1160
+
1161
+
1162
+
1163
+
1164
+
1165
+
1166
+
1167
+
1168
+
1169
+
1170
+
1171
+
1172
+
1173
+
1174
+
1175
+
1176
+
1177
+
1178
+ Berners-Lee, Masinter & McCahill [Page 21]
1179
+
1180
+ RFC 1738 Uniform Resource Locators (URL) December 1994
1181
+
1182
+
1183
+ APPENDIX: Recommendations for URLs in Context
1184
+
1185
+ URIs, including URLs, are intended to be transmitted through
1186
+ protocols which provide a context for their interpretation.
1187
+
1188
+ In some cases, it will be necessary to distinguish URLs from other
1189
+ possible data structures in a syntactic structure. In this case, is
1190
+ recommended that URLs be preceeded with a prefix consisting of the
1191
+ characters "URL:". For example, this prefix may be used to
1192
+ distinguish URLs from other kinds of URIs.
1193
+
1194
+ In addition, there are many occasions when URLs are included in other
1195
+ kinds of text; examples include electronic mail, USENET news
1196
+ messages, or printed on paper. In such cases, it is convenient to
1197
+ have a separate syntactic wrapper that delimits the URL and separates
1198
+ it from the rest of the text, and in particular from punctuation
1199
+ marks that might be mistaken for part of the URL. For this purpose,
1200
+ is recommended that angle brackets ("<" and ">"), along with the
1201
+ prefix "URL:", be used to delimit the boundaries of the URL. This
1202
+ wrapper does not form part of the URL and should not be used in
1203
+ contexts in which delimiters are already specified.
1204
+
1205
+ In the case where a fragment/anchor identifier is associated with a
1206
+ URL (following a "#"), the identifier would be placed within the
1207
+ brackets as well.
1208
+
1209
+ In some cases, extra whitespace (spaces, linebreaks, tabs, etc.) may
1210
+ need to be added to break long URLs across lines. The whitespace
1211
+ should be ignored when extracting the URL.
1212
+
1213
+ No whitespace should be introduced after a hyphen ("-") character.
1214
+ Because some typesetters and printers may (erroneously) introduce a
1215
+ hyphen at the end of line when breaking a line, the interpreter of a
1216
+ URL containing a line break immediately after a hyphen should ignore
1217
+ all unencoded whitespace around the line break, and should be aware
1218
+ that the hyphen may or may not actually be part of the URL.
1219
+
1220
+ Examples:
1221
+
1222
+ Yes, Jim, I found it under <URL:ftp://info.cern.ch/pub/www/doc;
1223
+ type=d> but you can probably pick it up from <URL:ftp://ds.in
1224
+ ternic.net/rfc>. Note the warning in <URL:http://ds.internic.
1225
+ net/instructions/overview.html#WARNING>.
1226
+
1227
+
1228
+
1229
+
1230
+
1231
+
1232
+
1233
+
1234
+ Berners-Lee, Masinter & McCahill [Page 22]
1235
+
1236
+ RFC 1738 Uniform Resource Locators (URL) December 1994
1237
+
1238
+
1239
+ References
1240
+
1241
+ [1] Anklesaria, F., McCahill, M., Lindner, P., Johnson, D.,
1242
+ Torrey, D., and B. Alberti, "The Internet Gopher Protocol
1243
+ (a distributed document search and retrieval protocol)",
1244
+ RFC 1436, University of Minnesota, March 1993.
1245
+ <URL:ftp://ds.internic.net/rfc/rfc1436.txt;type=a>
1246
+
1247
+ [2] Anklesaria, F., Lindner, P., McCahill, M., Torrey, D.,
1248
+ Johnson, D., and B. Alberti, "Gopher+: Upward compatible
1249
+ enhancements to the Internet Gopher protocol",
1250
+ University of Minnesota, July 1993.
1251
+ <URL:ftp://boombox.micro.umn.edu/pub/gopher/gopher_protocol
1252
+ /Gopher+/Gopher+.txt>
1253
+
1254
+ [3] Berners-Lee, T., "Universal Resource Identifiers in WWW: A
1255
+ Unifying Syntax for the Expression of Names and Addresses of
1256
+ Objects on the Network as used in the World-Wide Web", RFC
1257
+ 1630, CERN, June 1994.
1258
+ <URL:ftp://ds.internic.net/rfc/rfc1630.txt>
1259
+
1260
+ [4] Berners-Lee, T., "Hypertext Transfer Protocol (HTTP)",
1261
+ CERN, November 1993.
1262
+ <URL:ftp://info.cern.ch/pub/www/doc/http-spec.txt.Z>
1263
+
1264
+ [5] Braden, R., Editor, "Requirements for Internet Hosts --
1265
+ Application and Support", STD 3, RFC 1123, IETF, October 1989.
1266
+ <URL:ftp://ds.internic.net/rfc/rfc1123.txt>
1267
+
1268
+ [6] Crocker, D. "Standard for the Format of ARPA Internet Text
1269
+ Messages", STD 11, RFC 822, UDEL, April 1982.
1270
+ <URL:ftp://ds.internic.net/rfc/rfc822.txt>
1271
+
1272
+ [7] Davis, F., Kahle, B., Morris, H., Salem, J., Shen, T., Wang, R.,
1273
+ Sui, J., and M. Grinbaum, "WAIS Interface Protocol Prototype
1274
+ Functional Specification", (v1.5), Thinking Machines
1275
+ Corporation, April 1990.
1276
+ <URL:ftp://quake.think.com/pub/wais/doc/protspec.txt>
1277
+
1278
+ [8] Horton, M. and R. Adams, "Standard For Interchange of USENET
1279
+ Messages", RFC 1036, AT&T Bell Laboratories, Center for Seismic
1280
+ Studies, December 1987.
1281
+ <URL:ftp://ds.internic.net/rfc/rfc1036.txt>
1282
+
1283
+ [9] Huitema, C., "Naming: Strategies and Techniques", Computer
1284
+ Networks and ISDN Systems 23 (1991) 107-110.
1285
+
1286
+
1287
+
1288
+
1289
+
1290
+ Berners-Lee, Masinter & McCahill [Page 23]
1291
+
1292
+ RFC 1738 Uniform Resource Locators (URL) December 1994
1293
+
1294
+
1295
+ [10] Kahle, B., "Document Identifiers, or International Standard
1296
+ Book Numbers for the Electronic Age", 1991.
1297
+ <URL:ftp://quake.think.com/pub/wais/doc/doc-ids.txt>
1298
+
1299
+ [11] Kantor, B. and P. Lapsley, "Network News Transfer Protocol:
1300
+ A Proposed Standard for the Stream-Based Transmission of News",
1301
+ RFC 977, UC San Diego & UC Berkeley, February 1986.
1302
+ <URL:ftp://ds.internic.net/rfc/rfc977.txt>
1303
+
1304
+ [12] Kunze, J., "Functional Requirements for Internet Resource
1305
+ Locators", Work in Progress, December 1994.
1306
+ <URL:ftp://ds.internic.net/internet-drafts
1307
+ /draft-ietf-uri-irl-fun-req-02.txt>
1308
+
1309
+ [13] Mockapetris, P., "Domain Names - Concepts and Facilities",
1310
+ STD 13, RFC 1034, USC/Information Sciences Institute,
1311
+ November 1987.
1312
+ <URL:ftp://ds.internic.net/rfc/rfc1034.txt>
1313
+
1314
+ [14] Neuman, B., and S. Augart, "The Prospero Protocol",
1315
+ USC/Information Sciences Institute, June 1993.
1316
+ <URL:ftp://prospero.isi.edu/pub/prospero/doc
1317
+ /prospero-protocol.PS.Z>
1318
+
1319
+ [15] Postel, J. and J. Reynolds, "File Transfer Protocol (FTP)",
1320
+ STD 9, RFC 959, USC/Information Sciences Institute,
1321
+ October 1985.
1322
+ <URL:ftp://ds.internic.net/rfc/rfc959.txt>
1323
+
1324
+ [16] Sollins, K. and L. Masinter, "Functional Requirements for
1325
+ Uniform Resource Names", RFC 1737, MIT/LCS, Xerox Corporation,
1326
+ December 1994.
1327
+ <URL:ftp://ds.internic.net/rfc/rfc1737.txt>
1328
+
1329
+ [17] St. Pierre, M, Fullton, J., Gamiel, K., Goldman, J., Kahle, B.,
1330
+ Kunze, J., Morris, H., and F. Schiettecatte, "WAIS over
1331
+ Z39.50-1988", RFC 1625, WAIS, Inc., CNIDR, Thinking Machines
1332
+ Corp., UC Berkeley, FS Consulting, June 1994.
1333
+ <URL:ftp://ds.internic.net/rfc/rfc1625.txt>
1334
+
1335
+ [18] Yeong, W. "Towards Networked Information Retrieval", Technical
1336
+ report 91-06-25-01, Performance Systems International, Inc.
1337
+ <URL:ftp://uu.psi.com/wp/nir.txt>, June 1991.
1338
+
1339
+ [19] Yeong, W., "Representing Public Archives in the Directory",
1340
+ Work in Progress, November 1991.
1341
+
1342
+
1343
+
1344
+
1345
+
1346
+ Berners-Lee, Masinter & McCahill [Page 24]
1347
+
1348
+ RFC 1738 Uniform Resource Locators (URL) December 1994
1349
+
1350
+
1351
+ [20] "Coded Character Set -- 7-bit American Standard Code for
1352
+ Information Interchange", ANSI X3.4-1986.
1353
+
1354
+ Editors' Addresses
1355
+
1356
+ Tim Berners-Lee
1357
+ World-Wide Web project
1358
+ CERN,
1359
+ 1211 Geneva 23,
1360
+ Switzerland
1361
+
1362
+ Phone: +41 (22)767 3755
1363
+ Fax: +41 (22)767 7155
1364
+ EMail: timbl@info.cern.ch
1365
+
1366
+
1367
+ Larry Masinter
1368
+ Xerox PARC
1369
+ 3333 Coyote Hill Road
1370
+ Palo Alto, CA 94034
1371
+
1372
+ Phone: (415) 812-4365
1373
+ Fax: (415) 812-4333
1374
+ EMail: masinter@parc.xerox.com
1375
+
1376
+
1377
+ Mark McCahill
1378
+ Computer and Information Services,
1379
+ University of Minnesota
1380
+ Room 152 Shepherd Labs
1381
+ 100 Union Street SE
1382
+ Minneapolis, MN 55455
1383
+
1384
+ Phone: (612) 625 1300
1385
+ EMail: mpm@boombox.micro.umn.edu
1386
+
1387
+
1388
+
1389
+
1390
+
1391
+
1392
+
1393
+
1394
+
1395
+
1396
+
1397
+
1398
+
1399
+
1400
+
1401
+
1402
+ Berners-Lee, Masinter & McCahill [Page 25]
1403
+