rio 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (395) hide show
  1. data/COPYING +340 -0
  2. data/ChangeLog +755 -0
  3. data/README +65 -0
  4. data/RUNME.1st.rb +75 -0
  5. data/Rakefile +312 -0
  6. data/VERSION +1 -0
  7. data/doc/README_MSWIN32.txt +39 -0
  8. data/doc/RELEASE_NOTES +130 -0
  9. data/doc/generators/template/html/rio.rb +895 -0
  10. data/doc/rdoc/classes/Kernel.html +181 -0
  11. data/doc/rdoc/classes/Kernel.src/M000183.html +18 -0
  12. data/doc/rdoc/classes/RIO.html +508 -0
  13. data/doc/rdoc/classes/RIO.src/M000001.html +18 -0
  14. data/doc/rdoc/classes/RIO.src/M000002.html +18 -0
  15. data/doc/rdoc/classes/RIO.src/M000003.html +18 -0
  16. data/doc/rdoc/classes/RIO/Doc.html +138 -0
  17. data/doc/rdoc/classes/RIO/Doc/HOWTO.html +1031 -0
  18. data/doc/rdoc/classes/RIO/Doc/INTRO.html +1116 -0
  19. data/doc/rdoc/classes/RIO/Doc/MISC.html +443 -0
  20. data/doc/rdoc/classes/RIO/Doc/SYNOPSIS.html +325 -0
  21. data/doc/rdoc/classes/RIO/Rio.html +6333 -0
  22. data/doc/rdoc/classes/RIO/Rio.src/M000004.html +18 -0
  23. data/doc/rdoc/classes/RIO/Rio.src/M000005.html +20 -0
  24. data/doc/rdoc/classes/RIO/Rio.src/M000006.html +27 -0
  25. data/doc/rdoc/classes/RIO/Rio.src/M000007.html +27 -0
  26. data/doc/rdoc/classes/RIO/Rio.src/M000008.html +16 -0
  27. data/doc/rdoc/classes/RIO/Rio.src/M000009.html +18 -0
  28. data/doc/rdoc/classes/RIO/Rio.src/M000010.html +20 -0
  29. data/doc/rdoc/classes/RIO/Rio.src/M000011.html +16 -0
  30. data/doc/rdoc/classes/RIO/Rio.src/M000012.html +16 -0
  31. data/doc/rdoc/classes/RIO/Rio.src/M000013.html +16 -0
  32. data/doc/rdoc/classes/RIO/Rio.src/M000014.html +16 -0
  33. data/doc/rdoc/classes/RIO/Rio.src/M000015.html +16 -0
  34. data/doc/rdoc/classes/RIO/Rio.src/M000016.html +16 -0
  35. data/doc/rdoc/classes/RIO/Rio.src/M000017.html +16 -0
  36. data/doc/rdoc/classes/RIO/Rio.src/M000018.html +16 -0
  37. data/doc/rdoc/classes/RIO/Rio.src/M000019.html +16 -0
  38. data/doc/rdoc/classes/RIO/Rio.src/M000020.html +16 -0
  39. data/doc/rdoc/classes/RIO/Rio.src/M000021.html +16 -0
  40. data/doc/rdoc/classes/RIO/Rio.src/M000022.html +16 -0
  41. data/doc/rdoc/classes/RIO/Rio.src/M000023.html +16 -0
  42. data/doc/rdoc/classes/RIO/Rio.src/M000024.html +16 -0
  43. data/doc/rdoc/classes/RIO/Rio.src/M000025.html +16 -0
  44. data/doc/rdoc/classes/RIO/Rio.src/M000026.html +16 -0
  45. data/doc/rdoc/classes/RIO/Rio.src/M000027.html +16 -0
  46. data/doc/rdoc/classes/RIO/Rio.src/M000028.html +16 -0
  47. data/doc/rdoc/classes/RIO/Rio.src/M000029.html +16 -0
  48. data/doc/rdoc/classes/RIO/Rio.src/M000030.html +16 -0
  49. data/doc/rdoc/classes/RIO/Rio.src/M000031.html +16 -0
  50. data/doc/rdoc/classes/RIO/Rio.src/M000032.html +16 -0
  51. data/doc/rdoc/classes/RIO/Rio.src/M000033.html +16 -0
  52. data/doc/rdoc/classes/RIO/Rio.src/M000034.html +16 -0
  53. data/doc/rdoc/classes/RIO/Rio.src/M000035.html +16 -0
  54. data/doc/rdoc/classes/RIO/Rio.src/M000036.html +16 -0
  55. data/doc/rdoc/classes/RIO/Rio.src/M000037.html +16 -0
  56. data/doc/rdoc/classes/RIO/Rio.src/M000038.html +16 -0
  57. data/doc/rdoc/classes/RIO/Rio.src/M000039.html +16 -0
  58. data/doc/rdoc/classes/RIO/Rio.src/M000040.html +16 -0
  59. data/doc/rdoc/classes/RIO/Rio.src/M000041.html +16 -0
  60. data/doc/rdoc/classes/RIO/Rio.src/M000042.html +16 -0
  61. data/doc/rdoc/classes/RIO/Rio.src/M000043.html +16 -0
  62. data/doc/rdoc/classes/RIO/Rio.src/M000044.html +16 -0
  63. data/doc/rdoc/classes/RIO/Rio.src/M000045.html +16 -0
  64. data/doc/rdoc/classes/RIO/Rio.src/M000046.html +16 -0
  65. data/doc/rdoc/classes/RIO/Rio.src/M000047.html +16 -0
  66. data/doc/rdoc/classes/RIO/Rio.src/M000048.html +16 -0
  67. data/doc/rdoc/classes/RIO/Rio.src/M000049.html +16 -0
  68. data/doc/rdoc/classes/RIO/Rio.src/M000050.html +16 -0
  69. data/doc/rdoc/classes/RIO/Rio.src/M000051.html +16 -0
  70. data/doc/rdoc/classes/RIO/Rio.src/M000052.html +16 -0
  71. data/doc/rdoc/classes/RIO/Rio.src/M000053.html +16 -0
  72. data/doc/rdoc/classes/RIO/Rio.src/M000054.html +16 -0
  73. data/doc/rdoc/classes/RIO/Rio.src/M000055.html +16 -0
  74. data/doc/rdoc/classes/RIO/Rio.src/M000056.html +16 -0
  75. data/doc/rdoc/classes/RIO/Rio.src/M000057.html +16 -0
  76. data/doc/rdoc/classes/RIO/Rio.src/M000058.html +16 -0
  77. data/doc/rdoc/classes/RIO/Rio.src/M000059.html +16 -0
  78. data/doc/rdoc/classes/RIO/Rio.src/M000060.html +16 -0
  79. data/doc/rdoc/classes/RIO/Rio.src/M000061.html +16 -0
  80. data/doc/rdoc/classes/RIO/Rio.src/M000062.html +16 -0
  81. data/doc/rdoc/classes/RIO/Rio.src/M000063.html +16 -0
  82. data/doc/rdoc/classes/RIO/Rio.src/M000064.html +16 -0
  83. data/doc/rdoc/classes/RIO/Rio.src/M000065.html +16 -0
  84. data/doc/rdoc/classes/RIO/Rio.src/M000066.html +16 -0
  85. data/doc/rdoc/classes/RIO/Rio.src/M000067.html +16 -0
  86. data/doc/rdoc/classes/RIO/Rio.src/M000068.html +16 -0
  87. data/doc/rdoc/classes/RIO/Rio.src/M000069.html +16 -0
  88. data/doc/rdoc/classes/RIO/Rio.src/M000070.html +16 -0
  89. data/doc/rdoc/classes/RIO/Rio.src/M000071.html +16 -0
  90. data/doc/rdoc/classes/RIO/Rio.src/M000072.html +16 -0
  91. data/doc/rdoc/classes/RIO/Rio.src/M000073.html +16 -0
  92. data/doc/rdoc/classes/RIO/Rio.src/M000074.html +16 -0
  93. data/doc/rdoc/classes/RIO/Rio.src/M000075.html +16 -0
  94. data/doc/rdoc/classes/RIO/Rio.src/M000076.html +16 -0
  95. data/doc/rdoc/classes/RIO/Rio.src/M000077.html +16 -0
  96. data/doc/rdoc/classes/RIO/Rio.src/M000078.html +16 -0
  97. data/doc/rdoc/classes/RIO/Rio.src/M000079.html +16 -0
  98. data/doc/rdoc/classes/RIO/Rio.src/M000080.html +16 -0
  99. data/doc/rdoc/classes/RIO/Rio.src/M000081.html +16 -0
  100. data/doc/rdoc/classes/RIO/Rio.src/M000082.html +16 -0
  101. data/doc/rdoc/classes/RIO/Rio.src/M000083.html +16 -0
  102. data/doc/rdoc/classes/RIO/Rio.src/M000084.html +16 -0
  103. data/doc/rdoc/classes/RIO/Rio.src/M000085.html +16 -0
  104. data/doc/rdoc/classes/RIO/Rio.src/M000086.html +16 -0
  105. data/doc/rdoc/classes/RIO/Rio.src/M000087.html +16 -0
  106. data/doc/rdoc/classes/RIO/Rio.src/M000088.html +16 -0
  107. data/doc/rdoc/classes/RIO/Rio.src/M000089.html +16 -0
  108. data/doc/rdoc/classes/RIO/Rio.src/M000090.html +16 -0
  109. data/doc/rdoc/classes/RIO/Rio.src/M000091.html +16 -0
  110. data/doc/rdoc/classes/RIO/Rio.src/M000092.html +16 -0
  111. data/doc/rdoc/classes/RIO/Rio.src/M000093.html +16 -0
  112. data/doc/rdoc/classes/RIO/Rio.src/M000094.html +16 -0
  113. data/doc/rdoc/classes/RIO/Rio.src/M000095.html +16 -0
  114. data/doc/rdoc/classes/RIO/Rio.src/M000096.html +16 -0
  115. data/doc/rdoc/classes/RIO/Rio.src/M000097.html +16 -0
  116. data/doc/rdoc/classes/RIO/Rio.src/M000098.html +16 -0
  117. data/doc/rdoc/classes/RIO/Rio.src/M000099.html +16 -0
  118. data/doc/rdoc/classes/RIO/Rio.src/M000100.html +16 -0
  119. data/doc/rdoc/classes/RIO/Rio.src/M000101.html +16 -0
  120. data/doc/rdoc/classes/RIO/Rio.src/M000102.html +16 -0
  121. data/doc/rdoc/classes/RIO/Rio.src/M000103.html +16 -0
  122. data/doc/rdoc/classes/RIO/Rio.src/M000104.html +16 -0
  123. data/doc/rdoc/classes/RIO/Rio.src/M000105.html +16 -0
  124. data/doc/rdoc/classes/RIO/Rio.src/M000106.html +16 -0
  125. data/doc/rdoc/classes/RIO/Rio.src/M000107.html +16 -0
  126. data/doc/rdoc/classes/RIO/Rio.src/M000108.html +16 -0
  127. data/doc/rdoc/classes/RIO/Rio.src/M000109.html +16 -0
  128. data/doc/rdoc/classes/RIO/Rio.src/M000110.html +16 -0
  129. data/doc/rdoc/classes/RIO/Rio.src/M000111.html +16 -0
  130. data/doc/rdoc/classes/RIO/Rio.src/M000112.html +16 -0
  131. data/doc/rdoc/classes/RIO/Rio.src/M000113.html +16 -0
  132. data/doc/rdoc/classes/RIO/Rio.src/M000114.html +16 -0
  133. data/doc/rdoc/classes/RIO/Rio.src/M000115.html +16 -0
  134. data/doc/rdoc/classes/RIO/Rio.src/M000116.html +16 -0
  135. data/doc/rdoc/classes/RIO/Rio.src/M000117.html +16 -0
  136. data/doc/rdoc/classes/RIO/Rio.src/M000118.html +16 -0
  137. data/doc/rdoc/classes/RIO/Rio.src/M000119.html +16 -0
  138. data/doc/rdoc/classes/RIO/Rio.src/M000120.html +16 -0
  139. data/doc/rdoc/classes/RIO/Rio.src/M000121.html +16 -0
  140. data/doc/rdoc/classes/RIO/Rio.src/M000122.html +16 -0
  141. data/doc/rdoc/classes/RIO/Rio.src/M000123.html +16 -0
  142. data/doc/rdoc/classes/RIO/Rio.src/M000124.html +16 -0
  143. data/doc/rdoc/classes/RIO/Rio.src/M000125.html +16 -0
  144. data/doc/rdoc/classes/RIO/Rio.src/M000126.html +16 -0
  145. data/doc/rdoc/classes/RIO/Rio.src/M000127.html +16 -0
  146. data/doc/rdoc/classes/RIO/Rio.src/M000128.html +16 -0
  147. data/doc/rdoc/classes/RIO/Rio.src/M000129.html +16 -0
  148. data/doc/rdoc/classes/RIO/Rio.src/M000130.html +16 -0
  149. data/doc/rdoc/classes/RIO/Rio.src/M000131.html +16 -0
  150. data/doc/rdoc/classes/RIO/Rio.src/M000132.html +16 -0
  151. data/doc/rdoc/classes/RIO/Rio.src/M000133.html +16 -0
  152. data/doc/rdoc/classes/RIO/Rio.src/M000134.html +16 -0
  153. data/doc/rdoc/classes/RIO/Rio.src/M000135.html +16 -0
  154. data/doc/rdoc/classes/RIO/Rio.src/M000136.html +16 -0
  155. data/doc/rdoc/classes/RIO/Rio.src/M000137.html +16 -0
  156. data/doc/rdoc/classes/RIO/Rio.src/M000138.html +16 -0
  157. data/doc/rdoc/classes/RIO/Rio.src/M000139.html +16 -0
  158. data/doc/rdoc/classes/RIO/Rio.src/M000140.html +16 -0
  159. data/doc/rdoc/classes/RIO/Rio.src/M000141.html +16 -0
  160. data/doc/rdoc/classes/RIO/Rio.src/M000142.html +16 -0
  161. data/doc/rdoc/classes/RIO/Rio.src/M000143.html +16 -0
  162. data/doc/rdoc/classes/RIO/Rio.src/M000144.html +16 -0
  163. data/doc/rdoc/classes/RIO/Rio.src/M000145.html +16 -0
  164. data/doc/rdoc/classes/RIO/Rio.src/M000146.html +16 -0
  165. data/doc/rdoc/classes/RIO/Rio.src/M000147.html +16 -0
  166. data/doc/rdoc/classes/RIO/Rio.src/M000148.html +16 -0
  167. data/doc/rdoc/classes/RIO/Rio.src/M000149.html +16 -0
  168. data/doc/rdoc/classes/RIO/Rio.src/M000150.html +16 -0
  169. data/doc/rdoc/classes/RIO/Rio.src/M000151.html +16 -0
  170. data/doc/rdoc/classes/RIO/Rio.src/M000152.html +16 -0
  171. data/doc/rdoc/classes/RIO/Rio.src/M000153.html +16 -0
  172. data/doc/rdoc/classes/RIO/Rio.src/M000154.html +16 -0
  173. data/doc/rdoc/classes/RIO/Rio.src/M000155.html +16 -0
  174. data/doc/rdoc/classes/RIO/Rio.src/M000156.html +16 -0
  175. data/doc/rdoc/classes/RIO/Rio.src/M000157.html +16 -0
  176. data/doc/rdoc/classes/RIO/Rio.src/M000158.html +16 -0
  177. data/doc/rdoc/classes/RIO/Rio.src/M000159.html +16 -0
  178. data/doc/rdoc/classes/RIO/Rio.src/M000160.html +16 -0
  179. data/doc/rdoc/classes/RIO/Rio.src/M000161.html +16 -0
  180. data/doc/rdoc/classes/RIO/Rio.src/M000162.html +16 -0
  181. data/doc/rdoc/classes/RIO/Rio.src/M000163.html +16 -0
  182. data/doc/rdoc/classes/RIO/Rio.src/M000164.html +16 -0
  183. data/doc/rdoc/classes/RIO/Rio.src/M000165.html +16 -0
  184. data/doc/rdoc/classes/RIO/Rio.src/M000166.html +16 -0
  185. data/doc/rdoc/classes/RIO/Rio.src/M000167.html +16 -0
  186. data/doc/rdoc/classes/RIO/Rio.src/M000168.html +16 -0
  187. data/doc/rdoc/classes/RIO/Rio.src/M000169.html +16 -0
  188. data/doc/rdoc/classes/RIO/Rio.src/M000170.html +16 -0
  189. data/doc/rdoc/classes/RIO/Rio.src/M000171.html +16 -0
  190. data/doc/rdoc/classes/RIO/Rio.src/M000172.html +16 -0
  191. data/doc/rdoc/classes/RIO/Rio.src/M000173.html +16 -0
  192. data/doc/rdoc/classes/RIO/Rio.src/M000174.html +16 -0
  193. data/doc/rdoc/classes/RIO/Rio.src/M000175.html +16 -0
  194. data/doc/rdoc/classes/RIO/Rio.src/M000176.html +16 -0
  195. data/doc/rdoc/classes/RIO/Rio.src/M000177.html +16 -0
  196. data/doc/rdoc/classes/RIO/Rio.src/M000178.html +16 -0
  197. data/doc/rdoc/classes/RIO/Rio.src/M000179.html +16 -0
  198. data/doc/rdoc/classes/RIO/Rio.src/M000180.html +16 -0
  199. data/doc/rdoc/classes/RIO/Rio.src/M000181.html +16 -0
  200. data/doc/rdoc/classes/RIO/Rio.src/M000182.html +16 -0
  201. data/doc/rdoc/created.rid +1 -0
  202. data/doc/rdoc/files/README.html +215 -0
  203. data/doc/rdoc/files/lib/rio/constructor_rb.html +142 -0
  204. data/doc/rdoc/files/lib/rio/doc/HOWTO_rb.html +135 -0
  205. data/doc/rdoc/files/lib/rio/doc/INTRO_rb.html +135 -0
  206. data/doc/rdoc/files/lib/rio/doc/MISC_rb.html +135 -0
  207. data/doc/rdoc/files/lib/rio/doc/SYNOPSIS_rb.html +135 -0
  208. data/doc/rdoc/files/lib/rio/if/basic_rb.html +135 -0
  209. data/doc/rdoc/files/lib/rio/if/dir_rb.html +135 -0
  210. data/doc/rdoc/files/lib/rio/if/file_rb.html +135 -0
  211. data/doc/rdoc/files/lib/rio/if/fileordir_rb.html +135 -0
  212. data/doc/rdoc/files/lib/rio/if/grande_rb.html +135 -0
  213. data/doc/rdoc/files/lib/rio/if/internal_rb.html +135 -0
  214. data/doc/rdoc/files/lib/rio/if/methods_rb.html +135 -0
  215. data/doc/rdoc/files/lib/rio/if/path_rb.html +135 -0
  216. data/doc/rdoc/files/lib/rio/if/stream_rb.html +135 -0
  217. data/doc/rdoc/files/lib/rio/if/test_rb.html +135 -0
  218. data/doc/rdoc/files/lib/rio/kernel_rb.html +142 -0
  219. data/doc/rdoc/files/lib/rio_rb.html +153 -0
  220. data/doc/rdoc/fr_class_index.html +34 -0
  221. data/doc/rdoc/fr_file_index.html +44 -0
  222. data/doc/rdoc/fr_method_index.html +210 -0
  223. data/doc/rdoc/index.html +24 -0
  224. data/doc/rdoc/rdoc-style.css +384 -0
  225. data/doc/rfc1738.txt +1403 -0
  226. data/doc/rfc959.txt +3933 -0
  227. data/ex/colx.rb +6 -0
  228. data/ex/findinruby +19 -0
  229. data/ex/findruby +11 -0
  230. data/ex/prompt.rb +25 -0
  231. data/ex/rgb.txt.gz +0 -0
  232. data/ex/riocat +35 -0
  233. data/ex/riogunzip +31 -0
  234. data/ex/riogzip +24 -0
  235. data/ex/tolf +11 -0
  236. data/lib/rio.rb +163 -0
  237. data/lib/rio/abstract_method.rb +57 -0
  238. data/lib/rio/argv.rb +57 -0
  239. data/lib/rio/arrayio.rb +199 -0
  240. data/lib/rio/arycopy.rb +44 -0
  241. data/lib/rio/assert.rb +115 -0
  242. data/lib/rio/base.rb +59 -0
  243. data/lib/rio/constructor.rb +183 -0
  244. data/lib/rio/context.rb +117 -0
  245. data/lib/rio/context/chomp.rb +53 -0
  246. data/lib/rio/context/closeoneof.rb +50 -0
  247. data/lib/rio/context/cxx.rb +67 -0
  248. data/lib/rio/context/dir.rb +92 -0
  249. data/lib/rio/context/gzip.rb +51 -0
  250. data/lib/rio/context/methods.rb +196 -0
  251. data/lib/rio/context/stream.rb +170 -0
  252. data/lib/rio/cp.rb +305 -0
  253. data/lib/rio/cxdir.rb +79 -0
  254. data/lib/rio/dir.rb +145 -0
  255. data/lib/rio/doc.rb +45 -0
  256. data/lib/rio/doc/HOWTO.rb +691 -0
  257. data/lib/rio/doc/INTRO.rb +579 -0
  258. data/lib/rio/doc/MISC.rb +257 -0
  259. data/lib/rio/doc/SYNOPSIS.rb +170 -0
  260. data/lib/rio/entrysel.rb +162 -0
  261. data/lib/rio/exception.rb +42 -0
  262. data/lib/rio/exception/copy.rb +98 -0
  263. data/lib/rio/exception/open.rb +62 -0
  264. data/lib/rio/exception/state.rb +74 -0
  265. data/lib/rio/ext.rb +62 -0
  266. data/lib/rio/ext/csv.rb +261 -0
  267. data/lib/rio/factory.rb +236 -0
  268. data/lib/rio/file.rb +77 -0
  269. data/lib/rio/filter/chomp.rb +61 -0
  270. data/lib/rio/filter/closeoneof.rb +103 -0
  271. data/lib/rio/filter/gzip.rb +58 -0
  272. data/lib/rio/ftp.rb +275 -0
  273. data/lib/rio/ftp/conn.rb +167 -0
  274. data/lib/rio/ftp/ioh.rb +88 -0
  275. data/lib/rio/grande.rb +126 -0
  276. data/lib/rio/handle.rb +101 -0
  277. data/lib/rio/if.rb +53 -0
  278. data/lib/rio/if/basic.rb +64 -0
  279. data/lib/rio/if/dir.rb +362 -0
  280. data/lib/rio/if/file.rb +57 -0
  281. data/lib/rio/if/fileordir.rb +247 -0
  282. data/lib/rio/if/grande.rb +510 -0
  283. data/lib/rio/if/internal.rb +53 -0
  284. data/lib/rio/if/methods.rb +612 -0
  285. data/lib/rio/if/path.rb +413 -0
  286. data/lib/rio/if/stream.rb +599 -0
  287. data/lib/rio/if/test.rb +219 -0
  288. data/lib/rio/impl/path.rb +82 -0
  289. data/lib/rio/ioh.rb +137 -0
  290. data/lib/rio/iomode.rb +96 -0
  291. data/lib/rio/kernel.rb +47 -0
  292. data/lib/rio/local.rb +63 -0
  293. data/lib/rio/match.rb +51 -0
  294. data/lib/rio/matchrecord.rb +254 -0
  295. data/lib/rio/open3.rb +69 -0
  296. data/lib/rio/ops/create.rb +78 -0
  297. data/lib/rio/ops/dir.rb +302 -0
  298. data/lib/rio/ops/either.rb +117 -0
  299. data/lib/rio/ops/file.rb +94 -0
  300. data/lib/rio/ops/path.rb +292 -0
  301. data/lib/rio/ops/stream.rb +84 -0
  302. data/lib/rio/ops/stream/input.rb +237 -0
  303. data/lib/rio/ops/stream/output.rb +96 -0
  304. data/lib/rio/ops/stream/read.rb +84 -0
  305. data/lib/rio/ops/stream/write.rb +58 -0
  306. data/lib/rio/ops/symlink.rb +70 -0
  307. data/lib/rio/path.rb +117 -0
  308. data/lib/rio/path/reset.rb +70 -0
  309. data/lib/rio/record.rb +59 -0
  310. data/lib/rio/rectype.rb +86 -0
  311. data/lib/rio/rl/base.rb +147 -0
  312. data/lib/rio/rl/builder.rb +166 -0
  313. data/lib/rio/rl/ioi.rb +66 -0
  314. data/lib/rio/rl/path.rb +141 -0
  315. data/lib/rio/rl/uri.rb +118 -0
  316. data/lib/rio/scheme/aryio.rb +89 -0
  317. data/lib/rio/scheme/cmdio.rb +74 -0
  318. data/lib/rio/scheme/fd.rb +65 -0
  319. data/lib/rio/scheme/ftp.rb +73 -0
  320. data/lib/rio/scheme/http.rb +81 -0
  321. data/lib/rio/scheme/path.rb +100 -0
  322. data/lib/rio/scheme/stderr.rb +56 -0
  323. data/lib/rio/scheme/stdio.rb +71 -0
  324. data/lib/rio/scheme/strio.rb +82 -0
  325. data/lib/rio/scheme/sysio.rb +61 -0
  326. data/lib/rio/scheme/tcp.rb +74 -0
  327. data/lib/rio/scheme/tempfile.rb +104 -0
  328. data/lib/rio/state.rb +209 -0
  329. data/lib/rio/state/error.rb +73 -0
  330. data/lib/rio/stream.rb +181 -0
  331. data/lib/rio/stream/base.rb +50 -0
  332. data/lib/rio/stream/duplex.rb +76 -0
  333. data/lib/rio/stream/open.rb +203 -0
  334. data/lib/rio/symantics.rb +46 -0
  335. data/lib/rio/to_rio.rb +57 -0
  336. data/lib/rio/uri/file.rb +145 -0
  337. data/lib/rio/version.rb +52 -0
  338. data/setup.rb +1331 -0
  339. data/test/1.rb +14 -0
  340. data/test/mswin32.rb +28 -0
  341. data/test/once.rb +7 -0
  342. data/test/runtests.rb +12 -0
  343. data/test/runtests_gem.rb +15 -0
  344. data/test/tc/abs.rb +349 -0
  345. data/test/tc/all.rb +42 -0
  346. data/test/tc/cd1.rb +116 -0
  347. data/test/tc/clearsel.rb +69 -0
  348. data/test/tc/closeoncopy.rb +91 -0
  349. data/test/tc/closeoneof.rb +194 -0
  350. data/test/tc/copy-from.rb +183 -0
  351. data/test/tc/copy-to.rb +94 -0
  352. data/test/tc/copy.rb +72 -0
  353. data/test/tc/copyarray.rb +191 -0
  354. data/test/tc/copydest.rb +50 -0
  355. data/test/tc/copydir.rb +192 -0
  356. data/test/tc/copydirlines.rb +124 -0
  357. data/test/tc/copylines.rb +40 -0
  358. data/test/tc/copynonex.rb +121 -0
  359. data/test/tc/create.rb +104 -0
  360. data/test/tc/csv.rb +229 -0
  361. data/test/tc/dir.rb +79 -0
  362. data/test/tc/dirautoclose.rb +70 -0
  363. data/test/tc/dirent.rb +180 -0
  364. data/test/tc/dirss.rb +84 -0
  365. data/test/tc/each.rb +111 -0
  366. data/test/tc/each_break.rb +241 -0
  367. data/test/tc/edf.rb +82 -0
  368. data/test/tc/entary.rb +230 -0
  369. data/test/tc/eq.rb +101 -0
  370. data/test/tc/expand_path.rb +94 -0
  371. data/test/tc/ext.rb +115 -0
  372. data/test/tc/fileno.rb +95 -0
  373. data/test/tc/getrec.rb +140 -0
  374. data/test/tc/lineno.rb +197 -0
  375. data/test/tc/lines.rb +66 -0
  376. data/test/tc/methods.rb +185 -0
  377. data/test/tc/misc.rb +473 -0
  378. data/test/tc/nolines.rb +205 -0
  379. data/test/tc/noqae.rb +873 -0
  380. data/test/tc/once.rb +6 -0
  381. data/test/tc/overload.rb +137 -0
  382. data/test/tc/pa.rb +159 -0
  383. data/test/tc/pathop.rb +63 -0
  384. data/test/tc/paths.rb +147 -0
  385. data/test/tc/qae.rb +494 -0
  386. data/test/tc/qae_riovar.rb +500 -0
  387. data/test/tc/records.rb +69 -0
  388. data/test/tc/rename.rb +224 -0
  389. data/test/tc/rename_assign.rb +48 -0
  390. data/test/tc/sub.rb +49 -0
  391. data/test/tc/symlink.rb +177 -0
  392. data/test/tc/symlink0.rb +298 -0
  393. data/test/tc/symlink1.rb +115 -0
  394. data/test/tc/testcase.rb +152 -0
  395. metadata +461 -0
@@ -0,0 +1,1403 @@
1
+
2
+
3
+
4
+
5
+
6
+
7
+ Network Working Group T. Berners-Lee
8
+ Request for Comments: 1738 CERN
9
+ Category: Standards Track L. Masinter
10
+ Xerox Corporation
11
+ M. McCahill
12
+ University of Minnesota
13
+ Editors
14
+ December 1994
15
+
16
+
17
+ Uniform Resource Locators (URL)
18
+
19
+ Status of this Memo
20
+
21
+ This document specifies an Internet standards track protocol for the
22
+ Internet community, and requests discussion and suggestions for
23
+ improvements. Please refer to the current edition of the "Internet
24
+ Official Protocol Standards" (STD 1) for the standardization state
25
+ and status of this protocol. Distribution of this memo is unlimited.
26
+
27
+ Abstract
28
+
29
+ This document specifies a Uniform Resource Locator (URL), the syntax
30
+ and semantics of formalized information for location and access of
31
+ resources via the Internet.
32
+
33
+ 1. Introduction
34
+
35
+ This document describes the syntax and semantics for a compact string
36
+ representation for a resource available via the Internet. These
37
+ strings are called "Uniform Resource Locators" (URLs).
38
+
39
+ The specification is derived from concepts introduced by the World-
40
+ Wide Web global information initiative, whose use of such objects
41
+ dates from 1990 and is described in "Universal Resource Identifiers
42
+ in WWW", RFC 1630. The specification of URLs is designed to meet the
43
+ requirements laid out in "Functional Requirements for Internet
44
+ Resource Locators" [12].
45
+
46
+ This document was written by the URI working group of the Internet
47
+ Engineering Task Force. Comments may be addressed to the editors, or
48
+ to the URI-WG <uri@bunyip.com>. Discussions of the group are archived
49
+ at <URL:http://www.acl.lanl.gov/URI/archive/uri-archive.index.html>
50
+
51
+
52
+
53
+
54
+
55
+
56
+
57
+
58
+ Berners-Lee, Masinter & McCahill [Page 1]
59
+
60
+ RFC 1738 Uniform Resource Locators (URL) December 1994
61
+
62
+
63
+ 2. General URL Syntax
64
+
65
+ Just as there are many different methods of access to resources,
66
+ there are several schemes for describing the location of such
67
+ resources.
68
+
69
+ The generic syntax for URLs provides a framework for new schemes to
70
+ be established using protocols other than those defined in this
71
+ document.
72
+
73
+ URLs are used to `locate' resources, by providing an abstract
74
+ identification of the resource location. Having located a resource,
75
+ a system may perform a variety of operations on the resource, as
76
+ might be characterized by such words as `access', `update',
77
+ `replace', `find attributes'. In general, only the `access' method
78
+ needs to be specified for any URL scheme.
79
+
80
+ 2.1. The main parts of URLs
81
+
82
+ A full BNF description of the URL syntax is given in Section 5.
83
+
84
+ In general, URLs are written as follows:
85
+
86
+ <scheme>:<scheme-specific-part>
87
+
88
+ A URL contains the name of the scheme being used (<scheme>) followed
89
+ by a colon and then a string (the <scheme-specific-part>) whose
90
+ interpretation depends on the scheme.
91
+
92
+ Scheme names consist of a sequence of characters. The lower case
93
+ letters "a"--"z", digits, and the characters plus ("+"), period
94
+ ("."), and hyphen ("-") are allowed. For resiliency, programs
95
+ interpreting URLs should treat upper case letters as equivalent to
96
+ lower case in scheme names (e.g., allow "HTTP" as well as "http").
97
+
98
+ 2.2. URL Character Encoding Issues
99
+
100
+ URLs are sequences of characters, i.e., letters, digits, and special
101
+ characters. A URLs may be represented in a variety of ways: e.g., ink
102
+ on paper, or a sequence of octets in a coded character set. The
103
+ interpretation of a URL depends only on the identity of the
104
+ characters used.
105
+
106
+ In most URL schemes, the sequences of characters in different parts
107
+ of a URL are used to represent sequences of octets used in Internet
108
+ protocols. For example, in the ftp scheme, the host name, directory
109
+ name and file names are such sequences of octets, represented by
110
+ parts of the URL. Within those parts, an octet may be represented by
111
+
112
+
113
+
114
+ Berners-Lee, Masinter & McCahill [Page 2]
115
+
116
+ RFC 1738 Uniform Resource Locators (URL) December 1994
117
+
118
+
119
+ the chararacter which has that octet as its code within the US-ASCII
120
+ [20] coded character set.
121
+
122
+ In addition, octets may be encoded by a character triplet consisting
123
+ of the character "%" followed by the two hexadecimal digits (from
124
+ "0123456789ABCDEF") which forming the hexadecimal value of the octet.
125
+ (The characters "abcdef" may also be used in hexadecimal encodings.)
126
+
127
+ Octets must be encoded if they have no corresponding graphic
128
+ character within the US-ASCII coded character set, if the use of the
129
+ corresponding character is unsafe, or if the corresponding character
130
+ is reserved for some other interpretation within the particular URL
131
+ scheme.
132
+
133
+ No corresponding graphic US-ASCII:
134
+
135
+ URLs are written only with the graphic printable characters of the
136
+ US-ASCII coded character set. The octets 80-FF hexadecimal are not
137
+ used in US-ASCII, and the octets 00-1F and 7F hexadecimal represent
138
+ control characters; these must be encoded.
139
+
140
+ Unsafe:
141
+
142
+ Characters can be unsafe for a number of reasons. The space
143
+ character is unsafe because significant spaces may disappear and
144
+ insignificant spaces may be introduced when URLs are transcribed or
145
+ typeset or subjected to the treatment of word-processing programs.
146
+ The characters "<" and ">" are unsafe because they are used as the
147
+ delimiters around URLs in free text; the quote mark (""") is used to
148
+ delimit URLs in some systems. The character "#" is unsafe and should
149
+ always be encoded because it is used in World Wide Web and in other
150
+ systems to delimit a URL from a fragment/anchor identifier that might
151
+ follow it. The character "%" is unsafe because it is used for
152
+ encodings of other characters. Other characters are unsafe because
153
+ gateways and other transport agents are known to sometimes modify
154
+ such characters. These characters are "{", "}", "|", "\", "^", "~",
155
+ "[", "]", and "`".
156
+
157
+ All unsafe characters must always be encoded within a URL. For
158
+ example, the character "#" must be encoded within URLs even in
159
+ systems that do not normally deal with fragment or anchor
160
+ identifiers, so that if the URL is copied into another system that
161
+ does use them, it will not be necessary to change the URL encoding.
162
+
163
+
164
+
165
+
166
+
167
+
168
+
169
+
170
+ Berners-Lee, Masinter & McCahill [Page 3]
171
+
172
+ RFC 1738 Uniform Resource Locators (URL) December 1994
173
+
174
+
175
+ Reserved:
176
+
177
+ Many URL schemes reserve certain characters for a special meaning:
178
+ their appearance in the scheme-specific part of the URL has a
179
+ designated semantics. If the character corresponding to an octet is
180
+ reserved in a scheme, the octet must be encoded. The characters ";",
181
+ "/", "?", ":", "@", "=" and "&" are the characters which may be
182
+ reserved for special meaning within a scheme. No other characters may
183
+ be reserved within a scheme.
184
+
185
+ Usually a URL has the same interpretation when an octet is
186
+ represented by a character and when it encoded. However, this is not
187
+ true for reserved characters: encoding a character reserved for a
188
+ particular scheme may change the semantics of a URL.
189
+
190
+ Thus, only alphanumerics, the special characters "$-_.+!*'(),", and
191
+ reserved characters used for their reserved purposes may be used
192
+ unencoded within a URL.
193
+
194
+ On the other hand, characters that are not required to be encoded
195
+ (including alphanumerics) may be encoded within the scheme-specific
196
+ part of a URL, as long as they are not being used for a reserved
197
+ purpose.
198
+
199
+ 2.3 Hierarchical schemes and relative links
200
+
201
+ In some cases, URLs are used to locate resources that contain
202
+ pointers to other resources. In some cases, those pointers are
203
+ represented as relative links where the expression of the location of
204
+ the second resource is in terms of "in the same place as this one
205
+ except with the following relative path". Relative links are not
206
+ described in this document. However, the use of relative links
207
+ depends on the original URL containing a hierarchical structure
208
+ against which the relative link is based.
209
+
210
+ Some URL schemes (such as the ftp, http, and file schemes) contain
211
+ names that can be considered hierarchical; the components of the
212
+ hierarchy are separated by "/".
213
+
214
+
215
+
216
+
217
+
218
+
219
+
220
+
221
+
222
+
223
+
224
+
225
+
226
+ Berners-Lee, Masinter & McCahill [Page 4]
227
+
228
+ RFC 1738 Uniform Resource Locators (URL) December 1994
229
+
230
+
231
+ 3. Specific Schemes
232
+
233
+ The mapping for some existing standard and experimental protocols is
234
+ outlined in the BNF syntax definition. Notes on particular protocols
235
+ follow. The schemes covered are:
236
+
237
+ ftp File Transfer protocol
238
+ http Hypertext Transfer Protocol
239
+ gopher The Gopher protocol
240
+ mailto Electronic mail address
241
+ news USENET news
242
+ nntp USENET news using NNTP access
243
+ telnet Reference to interactive sessions
244
+ wais Wide Area Information Servers
245
+ file Host-specific file names
246
+ prospero Prospero Directory Service
247
+
248
+ Other schemes may be specified by future specifications. Section 4 of
249
+ this document describes how new schemes may be registered, and lists
250
+ some scheme names that are under development.
251
+
252
+ 3.1. Common Internet Scheme Syntax
253
+
254
+ While the syntax for the rest of the URL may vary depending on the
255
+ particular scheme selected, URL schemes that involve the direct use
256
+ of an IP-based protocol to a specified host on the Internet use a
257
+ common syntax for the scheme-specific data:
258
+
259
+ //<user>:<password>@<host>:<port>/<url-path>
260
+
261
+ Some or all of the parts "<user>:<password>@", ":<password>",
262
+ ":<port>", and "/<url-path>" may be excluded. The scheme specific
263
+ data start with a double slash "//" to indicate that it complies with
264
+ the common Internet scheme syntax. The different components obey the
265
+ following rules:
266
+
267
+ user
268
+ An optional user name. Some schemes (e.g., ftp) allow the
269
+ specification of a user name.
270
+
271
+ password
272
+ An optional password. If present, it follows the user
273
+ name separated from it by a colon.
274
+
275
+ The user name (and password), if present, are followed by a
276
+ commercial at-sign "@". Within the user and password field, any ":",
277
+ "@", or "/" must be encoded.
278
+
279
+
280
+
281
+
282
+ Berners-Lee, Masinter & McCahill [Page 5]
283
+
284
+ RFC 1738 Uniform Resource Locators (URL) December 1994
285
+
286
+
287
+ Note that an empty user name or password is different than no user
288
+ name or password; there is no way to specify a password without
289
+ specifying a user name. E.g., <URL:ftp://@host.com/> has an empty
290
+ user name and no password, <URL:ftp://host.com/> has no user name,
291
+ while <URL:ftp://foo:@host.com/> has a user name of "foo" and an
292
+ empty password.
293
+
294
+ host
295
+ The fully qualified domain name of a network host, or its IP
296
+ address as a set of four decimal digit groups separated by
297
+ ".". Fully qualified domain names take the form as described
298
+ in Section 3.5 of RFC 1034 [13] and Section 2.1 of RFC 1123
299
+ [5]: a sequence of domain labels separated by ".", each domain
300
+ label starting and ending with an alphanumerical character and
301
+ possibly also containing "-" characters. The rightmost domain
302
+ label will never start with a digit, though, which
303
+ syntactically distinguishes all domain names from the IP
304
+ addresses.
305
+
306
+ port
307
+ The port number to connect to. Most schemes designate
308
+ protocols that have a default port number. Another port number
309
+ may optionally be supplied, in decimal, separated from the
310
+ host by a colon. If the port is omitted, the colon is as well.
311
+
312
+ url-path
313
+ The rest of the locator consists of data specific to the
314
+ scheme, and is known as the "url-path". It supplies the
315
+ details of how the specified resource can be accessed. Note
316
+ that the "/" between the host (or port) and the url-path is
317
+ NOT part of the url-path.
318
+
319
+ The url-path syntax depends on the scheme being used, as does the
320
+ manner in which it is interpreted.
321
+
322
+ 3.2. FTP
323
+
324
+ The FTP URL scheme is used to designate files and directories on
325
+ Internet hosts accessible using the FTP protocol (RFC959).
326
+
327
+ A FTP URL follow the syntax described in Section 3.1. If :<port> is
328
+ omitted, the port defaults to 21.
329
+
330
+
331
+
332
+
333
+
334
+
335
+
336
+
337
+
338
+ Berners-Lee, Masinter & McCahill [Page 6]
339
+
340
+ RFC 1738 Uniform Resource Locators (URL) December 1994
341
+
342
+
343
+ 3.2.1. FTP Name and Password
344
+
345
+ A user name and password may be supplied; they are used in the ftp
346
+ "USER" and "PASS" commands after first making the connection to the
347
+ FTP server. If no user name or password is supplied and one is
348
+ requested by the FTP server, the conventions for "anonymous" FTP are
349
+ to be used, as follows:
350
+
351
+ The user name "anonymous" is supplied.
352
+
353
+ The password is supplied as the Internet e-mail address
354
+ of the end user accessing the resource.
355
+
356
+ If the URL supplies a user name but no password, and the remote
357
+ server requests a password, the program interpreting the FTP URL
358
+ should request one from the user.
359
+
360
+ 3.2.2. FTP url-path
361
+
362
+ The url-path of a FTP URL has the following syntax:
363
+
364
+ <cwd1>/<cwd2>/.../<cwdN>/<name>;type=<typecode>
365
+
366
+ Where <cwd1> through <cwdN> and <name> are (possibly encoded) strings
367
+ and <typecode> is one of the characters "a", "i", or "d". The part
368
+ ";type=<typecode>" may be omitted. The <cwdx> and <name> parts may be
369
+ empty. The whole url-path may be omitted, including the "/"
370
+ delimiting it from the prefix containing user, password, host, and
371
+ port.
372
+
373
+ The url-path is interpreted as a series of FTP commands as follows:
374
+
375
+ Each of the <cwd> elements is to be supplied, sequentially, as the
376
+ argument to a CWD (change working directory) command.
377
+
378
+ If the typecode is "d", perform a NLST (name list) command with
379
+ <name> as the argument, and interpret the results as a file
380
+ directory listing.
381
+
382
+ Otherwise, perform a TYPE command with <typecode> as the argument,
383
+ and then access the file whose name is <name> (for example, using
384
+ the RETR command.)
385
+
386
+ Within a name or CWD component, the characters "/" and ";" are
387
+ reserved and must be encoded. The components are decoded prior to
388
+ their use in the FTP protocol. In particular, if the appropriate FTP
389
+ sequence to access a particular file requires supplying a string
390
+ containing a "/" as an argument to a CWD or RETR command, it is
391
+
392
+
393
+
394
+ Berners-Lee, Masinter & McCahill [Page 7]
395
+
396
+ RFC 1738 Uniform Resource Locators (URL) December 1994
397
+
398
+
399
+ necessary to encode each "/".
400
+
401
+ For example, the URL <URL:ftp://myname@host.dom/%2Fetc/motd> is
402
+ interpreted by FTP-ing to "host.dom", logging in as "myname"
403
+ (prompting for a password if it is asked for), and then executing
404
+ "CWD /etc" and then "RETR motd". This has a different meaning from
405
+ <URL:ftp://myname@host.dom/etc/motd> which would "CWD etc" and then
406
+ "RETR motd"; the initial "CWD" might be executed relative to the
407
+ default directory for "myname". On the other hand,
408
+ <URL:ftp://myname@host.dom//etc/motd>, would "CWD " with a null
409
+ argument, then "CWD etc", and then "RETR motd".
410
+
411
+ FTP URLs may also be used for other operations; for example, it is
412
+ possible to update a file on a remote file server, or infer
413
+ information about it from the directory listings. The mechanism for
414
+ doing so is not spelled out here.
415
+
416
+ 3.2.3. FTP Typecode is Optional
417
+
418
+ The entire ;type=<typecode> part of a FTP URL is optional. If it is
419
+ omitted, the client program interpreting the URL must guess the
420
+ appropriate mode to use. In general, the data content type of a file
421
+ can only be guessed from the name, e.g., from the suffix of the name;
422
+ the appropriate type code to be used for transfer of the file can
423
+ then be deduced from the data content of the file.
424
+
425
+ 3.2.4 Hierarchy
426
+
427
+ For some file systems, the "/" used to denote the hierarchical
428
+ structure of the URL corresponds to the delimiter used to construct a
429
+ file name hierarchy, and thus, the filename will look similar to the
430
+ URL path. This does NOT mean that the URL is a Unix filename.
431
+
432
+ 3.2.5. Optimization
433
+
434
+ Clients accessing resources via FTP may employ additional heuristics
435
+ to optimize the interaction. For some FTP servers, for example, it
436
+ may be reasonable to keep the control connection open while accessing
437
+ multiple URLs from the same server. However, there is no common
438
+ hierarchical model to the FTP protocol, so if a directory change
439
+ command has been given, it is impossible in general to deduce what
440
+ sequence should be given to navigate to another directory for a
441
+ second retrieval, if the paths are different. The only reliable
442
+ algorithm is to disconnect and reestablish the control connection.
443
+
444
+
445
+
446
+
447
+
448
+
449
+
450
+ Berners-Lee, Masinter & McCahill [Page 8]
451
+
452
+ RFC 1738 Uniform Resource Locators (URL) December 1994
453
+
454
+
455
+ 3.3. HTTP
456
+
457
+ The HTTP URL scheme is used to designate Internet resources
458
+ accessible using HTTP (HyperText Transfer Protocol).
459
+
460
+ The HTTP protocol is specified elsewhere. This specification only
461
+ describes the syntax of HTTP URLs.
462
+
463
+ An HTTP URL takes the form:
464
+
465
+ http://<host>:<port>/<path>?<searchpart>
466
+
467
+ where <host> and <port> are as described in Section 3.1. If :<port>
468
+ is omitted, the port defaults to 80. No user name or password is
469
+ allowed. <path> is an HTTP selector, and <searchpart> is a query
470
+ string. The <path> is optional, as is the <searchpart> and its
471
+ preceding "?". If neither <path> nor <searchpart> is present, the "/"
472
+ may also be omitted.
473
+
474
+ Within the <path> and <searchpart> components, "/", ";", "?" are
475
+ reserved. The "/" character may be used within HTTP to designate a
476
+ hierarchical structure.
477
+
478
+ 3.4. GOPHER
479
+
480
+ The Gopher URL scheme is used to designate Internet resources
481
+ accessible using the Gopher protocol.
482
+
483
+ The base Gopher protocol is described in RFC 1436 and supports items
484
+ and collections of items (directories). The Gopher+ protocol is a set
485
+ of upward compatible extensions to the base Gopher protocol and is
486
+ described in [2]. Gopher+ supports associating arbitrary sets of
487
+ attributes and alternate data representations with Gopher items.
488
+ Gopher URLs accommodate both Gopher and Gopher+ items and item
489
+ attributes.
490
+
491
+ 3.4.1. Gopher URL syntax
492
+
493
+ A Gopher URL takes the form:
494
+
495
+ gopher://<host>:<port>/<gopher-path>
496
+
497
+ where <gopher-path> is one of
498
+
499
+ <gophertype><selector>
500
+ <gophertype><selector>%09<search>
501
+ <gophertype><selector>%09<search>%09<gopher+_string>
502
+
503
+
504
+
505
+
506
+ Berners-Lee, Masinter & McCahill [Page 9]
507
+
508
+ RFC 1738 Uniform Resource Locators (URL) December 1994
509
+
510
+
511
+ If :<port> is omitted, the port defaults to 70. <gophertype> is a
512
+ single-character field to denote the Gopher type of the resource to
513
+ which the URL refers. The entire <gopher-path> may also be empty, in
514
+ which case the delimiting "/" is also optional and the <gophertype>
515
+ defaults to "1".
516
+
517
+ <selector> is the Gopher selector string. In the Gopher protocol,
518
+ Gopher selector strings are a sequence of octets which may contain
519
+ any octets except 09 hexadecimal (US-ASCII HT or tab) 0A hexadecimal
520
+ (US-ASCII character LF), and 0D (US-ASCII character CR).
521
+
522
+ Gopher clients specify which item to retrieve by sending the Gopher
523
+ selector string to a Gopher server.
524
+
525
+ Within the <gopher-path>, no characters are reserved.
526
+
527
+ Note that some Gopher <selector> strings begin with a copy of the
528
+ <gophertype> character, in which case that character will occur twice
529
+ consecutively. The Gopher selector string may be an empty string;
530
+ this is how Gopher clients refer to the top-level directory on a
531
+ Gopher server.
532
+
533
+ 3.4.2 Specifying URLs for Gopher Search Engines
534
+
535
+ If the URL refers to a search to be submitted to a Gopher search
536
+ engine, the selector is followed by an encoded tab (%09) and the
537
+ search string. To submit a search to a Gopher search engine, the
538
+ Gopher client sends the <selector> string (after decoding), a tab,
539
+ and the search string to the Gopher server.
540
+
541
+ 3.4.3 URL syntax for Gopher+ items
542
+
543
+ URLs for Gopher+ items have a second encoded tab (%09) and a Gopher+
544
+ string. Note that in this case, the %09<search> string must be
545
+ supplied, although the <search> element may be the empty string.
546
+
547
+ The <gopher+_string> is used to represent information required for
548
+ retrieval of the Gopher+ item. Gopher+ items may have alternate
549
+ views, arbitrary sets of attributes, and may have electronic forms
550
+ associated with them.
551
+
552
+ To retrieve the data associated with a Gopher+ URL, a client will
553
+ connect to the server and send the Gopher selector, followed by a tab
554
+ and the search string (which may be empty), followed by a tab and the
555
+ Gopher+ commands.
556
+
557
+
558
+
559
+
560
+
561
+
562
+ Berners-Lee, Masinter & McCahill [Page 10]
563
+
564
+ RFC 1738 Uniform Resource Locators (URL) December 1994
565
+
566
+
567
+ 3.4.4 Default Gopher+ data representation
568
+
569
+ When a Gopher server returns a directory listing to a client, the
570
+ Gopher+ items are tagged with either a "+" (denoting Gopher+ items)
571
+ or a "?" (denoting Gopher+ items which have a +ASK form associated
572
+ with them). A Gopher URL with a Gopher+ string consisting of only a
573
+ "+" refers to the default view (data representation) of the item
574
+ while a Gopher+ string containing only a "?" refer to an item with a
575
+ Gopher electronic form associated with it.
576
+
577
+ 3.4.5 Gopher+ items with electronic forms
578
+
579
+ Gopher+ items which have a +ASK associated with them (i.e. Gopher+
580
+ items tagged with a "?") require the client to fetch the item's +ASK
581
+ attribute to get the form definition, and then ask the user to fill
582
+ out the form and return the user's responses along with the selector
583
+ string to retrieve the item. Gopher+ clients know how to do this but
584
+ depend on the "?" tag in the Gopher+ item description to know when to
585
+ handle this case. The "?" is used in the Gopher+ string to be
586
+ consistent with Gopher+ protocol's use of this symbol.
587
+
588
+ 3.4.6 Gopher+ item attribute collections
589
+
590
+ To refer to the Gopher+ attributes of an item, the Gopher URL's
591
+ Gopher+ string consists of "!" or "$". "!" refers to the all of a
592
+ Gopher+ item's attributes. "$" refers to all the item attributes for
593
+ all items in a Gopher directory.
594
+
595
+ 3.4.7 Referring to specific Gopher+ attributes
596
+
597
+ To refer to specific attributes, the URL's gopher+_string is
598
+ "!<attribute_name>" or "$<attribute_name>". For example, to refer to
599
+ the attribute containing the abstract of an item, the gopher+_string
600
+ would be "!+ABSTRACT".
601
+
602
+ To refer to several attributes, the gopher+_string consists of the
603
+ attribute names separated by coded spaces. For example,
604
+ "!+ABSTRACT%20+SMELL" refers to the +ABSTRACT and +SMELL attributes
605
+ of an item.
606
+
607
+ 3.4.8 URL syntax for Gopher+ alternate views
608
+
609
+ Gopher+ allows for optional alternate data representations (alternate
610
+ views) of items. To retrieve a Gopher+ alternate view, a Gopher+
611
+ client sends the appropriate view and language identifier (found in
612
+ the item's +VIEW attribute). To refer to a specific Gopher+ alternate
613
+ view, the URL's Gopher+ string would be in the form:
614
+
615
+
616
+
617
+
618
+ Berners-Lee, Masinter & McCahill [Page 11]
619
+
620
+ RFC 1738 Uniform Resource Locators (URL) December 1994
621
+
622
+
623
+ +<view_name>%20<language_name>
624
+
625
+ For example, a Gopher+ string of "+application/postscript%20Es_ES"
626
+ refers to the Spanish language postscript alternate view of a Gopher+
627
+ item.
628
+
629
+ 3.4.9 URL syntax for Gopher+ electronic forms
630
+
631
+ The gopher+_string for a URL that refers to an item referenced by a
632
+ Gopher+ electronic form (an ASK block) filled out with specific
633
+ values is a coded version of what the client sends to the server.
634
+ The gopher+_string is of the form:
635
+
636
+ +%091%0D%0A+-1%0D%0A<ask_item1_value>%0D%0A<ask_item2_value>%0D%0A.%0D%0A
637
+
638
+ To retrieve this item, the Gopher client sends:
639
+
640
+ <a_gopher_selector><tab>+<tab>1<cr><lf>
641
+ +-1<cr><lf>
642
+ <ask_item1_value><cr><lf>
643
+ <ask_item2_value><cr><lf>
644
+ .<cr><lf>
645
+
646
+ to the Gopher server.
647
+
648
+ 3.5. MAILTO
649
+
650
+ The mailto URL scheme is used to designate the Internet mailing
651
+ address of an individual or service. No additional information other
652
+ than an Internet mailing address is present or implied.
653
+
654
+ A mailto URL takes the form:
655
+
656
+ mailto:<rfc822-addr-spec>
657
+
658
+ where <rfc822-addr-spec> is (the encoding of an) addr-spec, as
659
+ specified in RFC 822 [6]. Within mailto URLs, there are no reserved
660
+ characters.
661
+
662
+ Note that the percent sign ("%") is commonly used within RFC 822
663
+ addresses and must be encoded.
664
+
665
+ Unlike many URLs, the mailto scheme does not represent a data object
666
+ to be accessed directly; there is no sense in which it designates an
667
+ object. It has a different use than the message/external-body type in
668
+ MIME.
669
+
670
+
671
+
672
+
673
+
674
+ Berners-Lee, Masinter & McCahill [Page 12]
675
+
676
+ RFC 1738 Uniform Resource Locators (URL) December 1994
677
+
678
+
679
+ 3.6. NEWS
680
+
681
+ The news URL scheme is used to refer to either news groups or
682
+ individual articles of USENET news, as specified in RFC 1036.
683
+
684
+ A news URL takes one of two forms:
685
+
686
+ news:<newsgroup-name>
687
+ news:<message-id>
688
+
689
+ A <newsgroup-name> is a period-delimited hierarchical name, such as
690
+ "comp.infosystems.www.misc". A <message-id> corresponds to the
691
+ Message-ID of section 2.1.5 of RFC 1036, without the enclosing "<"
692
+ and ">"; it takes the form <unique>@<full_domain_name>. A message
693
+ identifier may be distinguished from a news group name by the
694
+ presence of the commercial at "@" character. No additional characters
695
+ are reserved within the components of a news URL.
696
+
697
+ If <newsgroup-name> is "*" (as in <URL:news:*>), it is used to refer
698
+ to "all available news groups".
699
+
700
+ The news URLs are unusual in that by themselves, they do not contain
701
+ sufficient information to locate a single resource, but, rather, are
702
+ location-independent.
703
+
704
+ 3.7. NNTP
705
+
706
+ The nntp URL scheme is an alternative method of referencing news
707
+ articles, useful for specifying news articles from NNTP servers (RFC
708
+ 977).
709
+
710
+ A nntp URL take the form:
711
+
712
+ nntp://<host>:<port>/<newsgroup-name>/<article-number>
713
+
714
+ where <host> and <port> are as described in Section 3.1. If :<port>
715
+ is omitted, the port defaults to 119.
716
+
717
+ The <newsgroup-name> is the name of the group, while the <article-
718
+ number> is the numeric id of the article within that newsgroup.
719
+
720
+ Note that while nntp: URLs specify a unique location for the article
721
+ resource, most NNTP servers currently on the Internet today are
722
+ configured only to allow access from local clients, and thus nntp
723
+ URLs do not designate globally accessible resources. Thus, the news:
724
+ form of URL is preferred as a way of identifying news articles.
725
+
726
+
727
+
728
+
729
+
730
+ Berners-Lee, Masinter & McCahill [Page 13]
731
+
732
+ RFC 1738 Uniform Resource Locators (URL) December 1994
733
+
734
+
735
+ 3.8. TELNET
736
+
737
+ The Telnet URL scheme is used to designate interactive services that
738
+ may be accessed by the Telnet protocol.
739
+
740
+ A telnet URL takes the form:
741
+
742
+ telnet://<user>:<password>@<host>:<port>/
743
+
744
+ as specified in Section 3.1. The final "/" character may be omitted.
745
+ If :<port> is omitted, the port defaults to 23. The :<password> can
746
+ be omitted, as well as the whole <user>:<password> part.
747
+
748
+ This URL does not designate a data object, but rather an interactive
749
+ service. Remote interactive services vary widely in the means by
750
+ which they allow remote logins; in practice, the <user> and
751
+ <password> supplied are advisory only: clients accessing a telnet URL
752
+ merely advise the user of the suggested username and password.
753
+
754
+ 3.9. WAIS
755
+
756
+ The WAIS URL scheme is used to designate WAIS databases, searches, or
757
+ individual documents available from a WAIS database. WAIS is
758
+ described in [7]. The WAIS protocol is described in RFC 1625 [17];
759
+ Although the WAIS protocol is based on Z39.50-1988, the WAIS URL
760
+ scheme is not intended for use with arbitrary Z39.50 services.
761
+
762
+ A WAIS URL takes one of the following forms:
763
+
764
+ wais://<host>:<port>/<database>
765
+ wais://<host>:<port>/<database>?<search>
766
+ wais://<host>:<port>/<database>/<wtype>/<wpath>
767
+
768
+ where <host> and <port> are as described in Section 3.1. If :<port>
769
+ is omitted, the port defaults to 210. The first form designates a
770
+ WAIS database that is available for searching. The second form
771
+ designates a particular search. <database> is the name of the WAIS
772
+ database being queried.
773
+
774
+ The third form designates a particular document within a WAIS
775
+ database to be retrieved. In this form <wtype> is the WAIS
776
+ designation of the type of the object. Many WAIS implementations
777
+ require that a client know the "type" of an object prior to
778
+ retrieval, the type being returned along with the internal object
779
+ identifier in the search response. The <wtype> is included in the
780
+ URL in order to allow the client interpreting the URL adequate
781
+ information to actually retrieve the document.
782
+
783
+
784
+
785
+
786
+ Berners-Lee, Masinter & McCahill [Page 14]
787
+
788
+ RFC 1738 Uniform Resource Locators (URL) December 1994
789
+
790
+
791
+ The <wpath> of a WAIS URL consists of the WAIS document-id, encoded
792
+ as necessary using the method described in Section 2.2. The WAIS
793
+ document-id should be treated opaquely; it may only be decomposed by
794
+ the server that issued it.
795
+
796
+ 3.10 FILES
797
+
798
+ The file URL scheme is used to designate files accessible on a
799
+ particular host computer. This scheme, unlike most other URL schemes,
800
+ does not designate a resource that is universally accessible over the
801
+ Internet.
802
+
803
+ A file URL takes the form:
804
+
805
+ file://<host>/<path>
806
+
807
+ where <host> is the fully qualified domain name of the system on
808
+ which the <path> is accessible, and <path> is a hierarchical
809
+ directory path of the form <directory>/<directory>/.../<name>.
810
+
811
+ For example, a VMS file
812
+
813
+ DISK$USER:[MY.NOTES]NOTE123456.TXT
814
+
815
+ might become
816
+
817
+ <URL:file://vms.host.edu/disk$user/my/notes/note12345.txt>
818
+
819
+ As a special case, <host> can be the string "localhost" or the empty
820
+ string; this is interpreted as `the machine from which the URL is
821
+ being interpreted'.
822
+
823
+ The file URL scheme is unusual in that it does not specify an
824
+ Internet protocol or access method for such files; as such, its
825
+ utility in network protocols between hosts is limited.
826
+
827
+ 3.11 PROSPERO
828
+
829
+ The Prospero URL scheme is used to designate resources that are
830
+ accessed via the Prospero Directory Service. The Prospero protocol is
831
+ described elsewhere [14].
832
+
833
+ A prospero URLs takes the form:
834
+
835
+ prospero://<host>:<port>/<hsoname>;<field>=<value>
836
+
837
+ where <host> and <port> are as described in Section 3.1. If :<port>
838
+ is omitted, the port defaults to 1525. No username or password is
839
+
840
+
841
+
842
+ Berners-Lee, Masinter & McCahill [Page 15]
843
+
844
+ RFC 1738 Uniform Resource Locators (URL) December 1994
845
+
846
+
847
+ allowed.
848
+
849
+ The <hsoname> is the host-specific object name in the Prospero
850
+ protocol, suitably encoded. This name is opaque and interpreted by
851
+ the Prospero server. The semicolon ";" is reserved and may not
852
+ appear without quoting in the <hsoname>.
853
+
854
+ Prospero URLs are interpreted by contacting a Prospero directory
855
+ server on the specified host and port to determine appropriate access
856
+ methods for a resource, which might themselves be represented as
857
+ different URLs. External Prospero links are represented as URLs of
858
+ the underlying access method and are not represented as Prospero
859
+ URLs.
860
+
861
+ Note that a slash "/" may appear in the <hsoname> without quoting and
862
+ no significance may be assumed by the application. Though slashes
863
+ may indicate hierarchical structure on the server, such structure is
864
+ not guaranteed. Note that many <hsoname>s begin with a slash, in
865
+ which case the host or port will be followed by a double slash: the
866
+ slash from the URL syntax, followed by the initial slash from the
867
+ <hsoname>. (E.g., <URL:prospero://host.dom//pros/name> designates a
868
+ <hsoname> of "/pros/name".)
869
+
870
+ In addition, after the <hsoname>, optional fields and values
871
+ associated with a Prospero link may be specified as part of the URL.
872
+ When present, each field/value pair is separated from each other and
873
+ from the rest of the URL by a ";" (semicolon). The name of the field
874
+ and its value are separated by a "=" (equal sign). If present, these
875
+ fields serve to identify the target of the URL. For example, the
876
+ OBJECT-VERSION field can be specified to identify a specific version
877
+ of an object.
878
+
879
+ 4. REGISTRATION OF NEW SCHEMES
880
+
881
+ A new scheme may be introduced by defining a mapping onto a
882
+ conforming URL syntax, using a new prefix. URLs for experimental
883
+ schemes may be used by mutual agreement between parties. Scheme names
884
+ starting with the characters "x-" are reserved for experimental
885
+ purposes.
886
+
887
+ The Internet Assigned Numbers Authority (IANA) will maintain a
888
+ registry of URL schemes. Any submission of a new URL scheme must
889
+ include a definition of an algorithm for accessing of resources
890
+ within that scheme and the syntax for representing such a scheme.
891
+
892
+ URL schemes must have demonstrable utility and operability. One way
893
+ to provide such a demonstration is via a gateway which provides
894
+ objects in the new scheme for clients using an existing protocol. If
895
+
896
+
897
+
898
+ Berners-Lee, Masinter & McCahill [Page 16]
899
+
900
+ RFC 1738 Uniform Resource Locators (URL) December 1994
901
+
902
+
903
+ the new scheme does not locate resources that are data objects, the
904
+ properties of names in the new space must be clearly defined.
905
+
906
+ New schemes should try to follow the same syntactic conventions of
907
+ existing schemes, where appropriate. It is likewise recommended
908
+ that, where a protocol allows for retrieval by URL, that the client
909
+ software have provision for being configured to use specific gateway
910
+ locators for indirect access through new naming schemes.
911
+
912
+ The following scheme have been proposed at various times, but this
913
+ document does not define their syntax or use at this time. It is
914
+ suggested that IANA reserve their scheme names for future definition:
915
+
916
+ afs Andrew File System global file names.
917
+ mid Message identifiers for electronic mail.
918
+ cid Content identifiers for MIME body parts.
919
+ nfs Network File System (NFS) file names.
920
+ tn3270 Interactive 3270 emulation sessions.
921
+ mailserver Access to data available from mail servers.
922
+ z39.50 Access to ANSI Z39.50 services.
923
+
924
+ 5. BNF for specific URL schemes
925
+
926
+ This is a BNF-like description of the Uniform Resource Locator
927
+ syntax, using the conventions of RFC822, except that "|" is used to
928
+ designate alternatives, and brackets [] are used around optional or
929
+ repeated elements. Briefly, literals are quoted with "", optional
930
+ elements are enclosed in [brackets], and elements may be preceded
931
+ with <n>* to designate n or more repetitions of the following
932
+ element; n defaults to 0.
933
+
934
+ ; The generic form of a URL is:
935
+
936
+ genericurl = scheme ":" schemepart
937
+
938
+ ; Specific predefined schemes are defined here; new schemes
939
+ ; may be registered with IANA
940
+
941
+ url = httpurl | ftpurl | newsurl |
942
+ nntpurl | telneturl | gopherurl |
943
+ waisurl | mailtourl | fileurl |
944
+ prosperourl | otherurl
945
+
946
+ ; new schemes follow the general syntax
947
+ otherurl = genericurl
948
+
949
+ ; the scheme is in lower case; interpreters should use case-ignore
950
+ scheme = 1*[ lowalpha | digit | "+" | "-" | "." ]
951
+
952
+
953
+
954
+ Berners-Lee, Masinter & McCahill [Page 17]
955
+
956
+ RFC 1738 Uniform Resource Locators (URL) December 1994
957
+
958
+
959
+ schemepart = *xchar | ip-schemepart
960
+
961
+
962
+ ; URL schemeparts for ip based protocols:
963
+
964
+ ip-schemepart = "//" login [ "/" urlpath ]
965
+
966
+ login = [ user [ ":" password ] "@" ] hostport
967
+ hostport = host [ ":" port ]
968
+ host = hostname | hostnumber
969
+ hostname = *[ domainlabel "." ] toplabel
970
+ domainlabel = alphadigit | alphadigit *[ alphadigit | "-" ] alphadigit
971
+ toplabel = alpha | alpha *[ alphadigit | "-" ] alphadigit
972
+ alphadigit = alpha | digit
973
+ hostnumber = digits "." digits "." digits "." digits
974
+ port = digits
975
+ user = *[ uchar | ";" | "?" | "&" | "=" ]
976
+ password = *[ uchar | ";" | "?" | "&" | "=" ]
977
+ urlpath = *xchar ; depends on protocol see section 3.1
978
+
979
+ ; The predefined schemes:
980
+
981
+ ; FTP (see also RFC959)
982
+
983
+ ftpurl = "ftp://" login [ "/" fpath [ ";type=" ftptype ]]
984
+ fpath = fsegment *[ "/" fsegment ]
985
+ fsegment = *[ uchar | "?" | ":" | "@" | "&" | "=" ]
986
+ ftptype = "A" | "I" | "D" | "a" | "i" | "d"
987
+
988
+ ; FILE
989
+
990
+ fileurl = "file://" [ host | "localhost" ] "/" fpath
991
+
992
+ ; HTTP
993
+
994
+ httpurl = "http://" hostport [ "/" hpath [ "?" search ]]
995
+ hpath = hsegment *[ "/" hsegment ]
996
+ hsegment = *[ uchar | ";" | ":" | "@" | "&" | "=" ]
997
+ search = *[ uchar | ";" | ":" | "@" | "&" | "=" ]
998
+
999
+ ; GOPHER (see also RFC1436)
1000
+
1001
+ gopherurl = "gopher://" hostport [ / [ gtype [ selector
1002
+ [ "%09" search [ "%09" gopher+_string ] ] ] ] ]
1003
+ gtype = xchar
1004
+ selector = *xchar
1005
+ gopher+_string = *xchar
1006
+
1007
+
1008
+
1009
+
1010
+ Berners-Lee, Masinter & McCahill [Page 18]
1011
+
1012
+ RFC 1738 Uniform Resource Locators (URL) December 1994
1013
+
1014
+
1015
+ ; MAILTO (see also RFC822)
1016
+
1017
+ mailtourl = "mailto:" encoded822addr
1018
+ encoded822addr = 1*xchar ; further defined in RFC822
1019
+
1020
+ ; NEWS (see also RFC1036)
1021
+
1022
+ newsurl = "news:" grouppart
1023
+ grouppart = "*" | group | article
1024
+ group = alpha *[ alpha | digit | "-" | "." | "+" | "_" ]
1025
+ article = 1*[ uchar | ";" | "/" | "?" | ":" | "&" | "=" ] "@" host
1026
+
1027
+ ; NNTP (see also RFC977)
1028
+
1029
+ nntpurl = "nntp://" hostport "/" group [ "/" digits ]
1030
+
1031
+ ; TELNET
1032
+
1033
+ telneturl = "telnet://" login [ "/" ]
1034
+
1035
+ ; WAIS (see also RFC1625)
1036
+
1037
+ waisurl = waisdatabase | waisindex | waisdoc
1038
+ waisdatabase = "wais://" hostport "/" database
1039
+ waisindex = "wais://" hostport "/" database "?" search
1040
+ waisdoc = "wais://" hostport "/" database "/" wtype "/" wpath
1041
+ database = *uchar
1042
+ wtype = *uchar
1043
+ wpath = *uchar
1044
+
1045
+ ; PROSPERO
1046
+
1047
+ prosperourl = "prospero://" hostport "/" ppath *[ fieldspec ]
1048
+ ppath = psegment *[ "/" psegment ]
1049
+ psegment = *[ uchar | "?" | ":" | "@" | "&" | "=" ]
1050
+ fieldspec = ";" fieldname "=" fieldvalue
1051
+ fieldname = *[ uchar | "?" | ":" | "@" | "&" ]
1052
+ fieldvalue = *[ uchar | "?" | ":" | "@" | "&" ]
1053
+
1054
+ ; Miscellaneous definitions
1055
+
1056
+ lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" |
1057
+ "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" |
1058
+ "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" |
1059
+ "y" | "z"
1060
+ hialpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
1061
+ "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
1062
+ "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
1063
+
1064
+
1065
+
1066
+ Berners-Lee, Masinter & McCahill [Page 19]
1067
+
1068
+ RFC 1738 Uniform Resource Locators (URL) December 1994
1069
+
1070
+
1071
+ alpha = lowalpha | hialpha
1072
+ digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
1073
+ "8" | "9"
1074
+ safe = "$" | "-" | "_" | "." | "+"
1075
+ extra = "!" | "*" | "'" | "(" | ")" | ","
1076
+ national = "{" | "}" | "|" | "\" | "^" | "~" | "[" | "]" | "`"
1077
+ punctuation = "<" | ">" | "#" | "%" | <">
1078
+
1079
+
1080
+ reserved = ";" | "/" | "?" | ":" | "@" | "&" | "="
1081
+ hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
1082
+ "a" | "b" | "c" | "d" | "e" | "f"
1083
+ escape = "%" hex hex
1084
+
1085
+ unreserved = alpha | digit | safe | extra
1086
+ uchar = unreserved | escape
1087
+ xchar = unreserved | reserved | escape
1088
+ digits = 1*digit
1089
+
1090
+ 6. Security Considerations
1091
+
1092
+ The URL scheme does not in itself pose a security threat. Users
1093
+ should beware that there is no general guarantee that a URL which at
1094
+ one time points to a given object continues to do so, and does not
1095
+ even at some later time point to a different object due to the
1096
+ movement of objects on servers.
1097
+
1098
+ A URL-related security threat is that it is sometimes possible to
1099
+ construct a URL such that an attempt to perform a harmless idempotent
1100
+ operation such as the retrieval of the object will in fact cause a
1101
+ possibly damaging remote operation to occur. The unsafe URL is
1102
+ typically constructed by specifying a port number other than that
1103
+ reserved for the network protocol in question. The client
1104
+ unwittingly contacts a server which is in fact running a different
1105
+ protocol. The content of the URL contains instructions which when
1106
+ interpreted according to this other protocol cause an unexpected
1107
+ operation. An example has been the use of gopher URLs to cause a rude
1108
+ message to be sent via a SMTP server. Caution should be used when
1109
+ using any URL which specifies a port number other than the default
1110
+ for the protocol, especially when it is a number within the reserved
1111
+ space.
1112
+
1113
+ Care should be taken when URLs contain embedded encoded delimiters
1114
+ for a given protocol (for example, CR and LF characters for telnet
1115
+ protocols) that these are not unencoded before transmission. This
1116
+ would violate the protocol but could be used to simulate an extra
1117
+ operation or parameter, again causing an unexpected and possible
1118
+ harmful remote operation to be performed.
1119
+
1120
+
1121
+
1122
+ Berners-Lee, Masinter & McCahill [Page 20]
1123
+
1124
+ RFC 1738 Uniform Resource Locators (URL) December 1994
1125
+
1126
+
1127
+ The use of URLs containing passwords that should be secret is clearly
1128
+ unwise.
1129
+
1130
+ 7. Acknowledgements
1131
+
1132
+ This paper builds on the basic WWW design (RFC 1630) and much
1133
+ discussion of these issues by many people on the network. The
1134
+ discussion was particularly stimulated by articles by Clifford Lynch,
1135
+ Brewster Kahle [10] and Wengyik Yeong [18]. Contributions from John
1136
+ Curran, Clifford Neuman, Ed Vielmetti and later the IETF URL BOF and
1137
+ URI working group were incorporated.
1138
+
1139
+ Most recently, careful readings and comments by Dan Connolly, Ned
1140
+ Freed, Roy Fielding, Guido van Rossum, Michael Dolan, Bert Bos, John
1141
+ Kunze, Olle Jarnefors, Peter Svanberg and many others have helped
1142
+ refine this RFC.
1143
+
1144
+
1145
+
1146
+
1147
+
1148
+
1149
+
1150
+
1151
+
1152
+
1153
+
1154
+
1155
+
1156
+
1157
+
1158
+
1159
+
1160
+
1161
+
1162
+
1163
+
1164
+
1165
+
1166
+
1167
+
1168
+
1169
+
1170
+
1171
+
1172
+
1173
+
1174
+
1175
+
1176
+
1177
+
1178
+ Berners-Lee, Masinter & McCahill [Page 21]
1179
+
1180
+ RFC 1738 Uniform Resource Locators (URL) December 1994
1181
+
1182
+
1183
+ APPENDIX: Recommendations for URLs in Context
1184
+
1185
+ URIs, including URLs, are intended to be transmitted through
1186
+ protocols which provide a context for their interpretation.
1187
+
1188
+ In some cases, it will be necessary to distinguish URLs from other
1189
+ possible data structures in a syntactic structure. In this case, is
1190
+ recommended that URLs be preceeded with a prefix consisting of the
1191
+ characters "URL:". For example, this prefix may be used to
1192
+ distinguish URLs from other kinds of URIs.
1193
+
1194
+ In addition, there are many occasions when URLs are included in other
1195
+ kinds of text; examples include electronic mail, USENET news
1196
+ messages, or printed on paper. In such cases, it is convenient to
1197
+ have a separate syntactic wrapper that delimits the URL and separates
1198
+ it from the rest of the text, and in particular from punctuation
1199
+ marks that might be mistaken for part of the URL. For this purpose,
1200
+ is recommended that angle brackets ("<" and ">"), along with the
1201
+ prefix "URL:", be used to delimit the boundaries of the URL. This
1202
+ wrapper does not form part of the URL and should not be used in
1203
+ contexts in which delimiters are already specified.
1204
+
1205
+ In the case where a fragment/anchor identifier is associated with a
1206
+ URL (following a "#"), the identifier would be placed within the
1207
+ brackets as well.
1208
+
1209
+ In some cases, extra whitespace (spaces, linebreaks, tabs, etc.) may
1210
+ need to be added to break long URLs across lines. The whitespace
1211
+ should be ignored when extracting the URL.
1212
+
1213
+ No whitespace should be introduced after a hyphen ("-") character.
1214
+ Because some typesetters and printers may (erroneously) introduce a
1215
+ hyphen at the end of line when breaking a line, the interpreter of a
1216
+ URL containing a line break immediately after a hyphen should ignore
1217
+ all unencoded whitespace around the line break, and should be aware
1218
+ that the hyphen may or may not actually be part of the URL.
1219
+
1220
+ Examples:
1221
+
1222
+ Yes, Jim, I found it under <URL:ftp://info.cern.ch/pub/www/doc;
1223
+ type=d> but you can probably pick it up from <URL:ftp://ds.in
1224
+ ternic.net/rfc>. Note the warning in <URL:http://ds.internic.
1225
+ net/instructions/overview.html#WARNING>.
1226
+
1227
+
1228
+
1229
+
1230
+
1231
+
1232
+
1233
+
1234
+ Berners-Lee, Masinter & McCahill [Page 22]
1235
+
1236
+ RFC 1738 Uniform Resource Locators (URL) December 1994
1237
+
1238
+
1239
+ References
1240
+
1241
+ [1] Anklesaria, F., McCahill, M., Lindner, P., Johnson, D.,
1242
+ Torrey, D., and B. Alberti, "The Internet Gopher Protocol
1243
+ (a distributed document search and retrieval protocol)",
1244
+ RFC 1436, University of Minnesota, March 1993.
1245
+ <URL:ftp://ds.internic.net/rfc/rfc1436.txt;type=a>
1246
+
1247
+ [2] Anklesaria, F., Lindner, P., McCahill, M., Torrey, D.,
1248
+ Johnson, D., and B. Alberti, "Gopher+: Upward compatible
1249
+ enhancements to the Internet Gopher protocol",
1250
+ University of Minnesota, July 1993.
1251
+ <URL:ftp://boombox.micro.umn.edu/pub/gopher/gopher_protocol
1252
+ /Gopher+/Gopher+.txt>
1253
+
1254
+ [3] Berners-Lee, T., "Universal Resource Identifiers in WWW: A
1255
+ Unifying Syntax for the Expression of Names and Addresses of
1256
+ Objects on the Network as used in the World-Wide Web", RFC
1257
+ 1630, CERN, June 1994.
1258
+ <URL:ftp://ds.internic.net/rfc/rfc1630.txt>
1259
+
1260
+ [4] Berners-Lee, T., "Hypertext Transfer Protocol (HTTP)",
1261
+ CERN, November 1993.
1262
+ <URL:ftp://info.cern.ch/pub/www/doc/http-spec.txt.Z>
1263
+
1264
+ [5] Braden, R., Editor, "Requirements for Internet Hosts --
1265
+ Application and Support", STD 3, RFC 1123, IETF, October 1989.
1266
+ <URL:ftp://ds.internic.net/rfc/rfc1123.txt>
1267
+
1268
+ [6] Crocker, D. "Standard for the Format of ARPA Internet Text
1269
+ Messages", STD 11, RFC 822, UDEL, April 1982.
1270
+ <URL:ftp://ds.internic.net/rfc/rfc822.txt>
1271
+
1272
+ [7] Davis, F., Kahle, B., Morris, H., Salem, J., Shen, T., Wang, R.,
1273
+ Sui, J., and M. Grinbaum, "WAIS Interface Protocol Prototype
1274
+ Functional Specification", (v1.5), Thinking Machines
1275
+ Corporation, April 1990.
1276
+ <URL:ftp://quake.think.com/pub/wais/doc/protspec.txt>
1277
+
1278
+ [8] Horton, M. and R. Adams, "Standard For Interchange of USENET
1279
+ Messages", RFC 1036, AT&T Bell Laboratories, Center for Seismic
1280
+ Studies, December 1987.
1281
+ <URL:ftp://ds.internic.net/rfc/rfc1036.txt>
1282
+
1283
+ [9] Huitema, C., "Naming: Strategies and Techniques", Computer
1284
+ Networks and ISDN Systems 23 (1991) 107-110.
1285
+
1286
+
1287
+
1288
+
1289
+
1290
+ Berners-Lee, Masinter & McCahill [Page 23]
1291
+
1292
+ RFC 1738 Uniform Resource Locators (URL) December 1994
1293
+
1294
+
1295
+ [10] Kahle, B., "Document Identifiers, or International Standard
1296
+ Book Numbers for the Electronic Age", 1991.
1297
+ <URL:ftp://quake.think.com/pub/wais/doc/doc-ids.txt>
1298
+
1299
+ [11] Kantor, B. and P. Lapsley, "Network News Transfer Protocol:
1300
+ A Proposed Standard for the Stream-Based Transmission of News",
1301
+ RFC 977, UC San Diego & UC Berkeley, February 1986.
1302
+ <URL:ftp://ds.internic.net/rfc/rfc977.txt>
1303
+
1304
+ [12] Kunze, J., "Functional Requirements for Internet Resource
1305
+ Locators", Work in Progress, December 1994.
1306
+ <URL:ftp://ds.internic.net/internet-drafts
1307
+ /draft-ietf-uri-irl-fun-req-02.txt>
1308
+
1309
+ [13] Mockapetris, P., "Domain Names - Concepts and Facilities",
1310
+ STD 13, RFC 1034, USC/Information Sciences Institute,
1311
+ November 1987.
1312
+ <URL:ftp://ds.internic.net/rfc/rfc1034.txt>
1313
+
1314
+ [14] Neuman, B., and S. Augart, "The Prospero Protocol",
1315
+ USC/Information Sciences Institute, June 1993.
1316
+ <URL:ftp://prospero.isi.edu/pub/prospero/doc
1317
+ /prospero-protocol.PS.Z>
1318
+
1319
+ [15] Postel, J. and J. Reynolds, "File Transfer Protocol (FTP)",
1320
+ STD 9, RFC 959, USC/Information Sciences Institute,
1321
+ October 1985.
1322
+ <URL:ftp://ds.internic.net/rfc/rfc959.txt>
1323
+
1324
+ [16] Sollins, K. and L. Masinter, "Functional Requirements for
1325
+ Uniform Resource Names", RFC 1737, MIT/LCS, Xerox Corporation,
1326
+ December 1994.
1327
+ <URL:ftp://ds.internic.net/rfc/rfc1737.txt>
1328
+
1329
+ [17] St. Pierre, M, Fullton, J., Gamiel, K., Goldman, J., Kahle, B.,
1330
+ Kunze, J., Morris, H., and F. Schiettecatte, "WAIS over
1331
+ Z39.50-1988", RFC 1625, WAIS, Inc., CNIDR, Thinking Machines
1332
+ Corp., UC Berkeley, FS Consulting, June 1994.
1333
+ <URL:ftp://ds.internic.net/rfc/rfc1625.txt>
1334
+
1335
+ [18] Yeong, W. "Towards Networked Information Retrieval", Technical
1336
+ report 91-06-25-01, Performance Systems International, Inc.
1337
+ <URL:ftp://uu.psi.com/wp/nir.txt>, June 1991.
1338
+
1339
+ [19] Yeong, W., "Representing Public Archives in the Directory",
1340
+ Work in Progress, November 1991.
1341
+
1342
+
1343
+
1344
+
1345
+
1346
+ Berners-Lee, Masinter & McCahill [Page 24]
1347
+
1348
+ RFC 1738 Uniform Resource Locators (URL) December 1994
1349
+
1350
+
1351
+ [20] "Coded Character Set -- 7-bit American Standard Code for
1352
+ Information Interchange", ANSI X3.4-1986.
1353
+
1354
+ Editors' Addresses
1355
+
1356
+ Tim Berners-Lee
1357
+ World-Wide Web project
1358
+ CERN,
1359
+ 1211 Geneva 23,
1360
+ Switzerland
1361
+
1362
+ Phone: +41 (22)767 3755
1363
+ Fax: +41 (22)767 7155
1364
+ EMail: timbl@info.cern.ch
1365
+
1366
+
1367
+ Larry Masinter
1368
+ Xerox PARC
1369
+ 3333 Coyote Hill Road
1370
+ Palo Alto, CA 94034
1371
+
1372
+ Phone: (415) 812-4365
1373
+ Fax: (415) 812-4333
1374
+ EMail: masinter@parc.xerox.com
1375
+
1376
+
1377
+ Mark McCahill
1378
+ Computer and Information Services,
1379
+ University of Minnesota
1380
+ Room 152 Shepherd Labs
1381
+ 100 Union Street SE
1382
+ Minneapolis, MN 55455
1383
+
1384
+ Phone: (612) 625 1300
1385
+ EMail: mpm@boombox.micro.umn.edu
1386
+
1387
+
1388
+
1389
+
1390
+
1391
+
1392
+
1393
+
1394
+
1395
+
1396
+
1397
+
1398
+
1399
+
1400
+
1401
+
1402
+ Berners-Lee, Masinter & McCahill [Page 25]
1403
+