uri_pathname 0.0.0.pre2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +26 -0
- data/LICENSE +20 -0
- data/README.rdoc +150 -0
- data/Rakefile +60 -0
- data/examples/simple_examples.rb +44 -0
- data/examples/using_fakeweb.rb +66 -0
- data/lib/uri_pathname/version.rb +16 -0
- data/lib/uri_pathname.rb +141 -0
- data/test/test_uri_pathname.rb +58 -0
- data/uri_pathname.gemspec +52 -0
- metadata +82 -0
data/.document
ADDED
data/.gitignore
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
## MAC OS
|
2
|
+
.DS_Store
|
3
|
+
|
4
|
+
## TEXTMATE
|
5
|
+
*.tmproj
|
6
|
+
tmtags
|
7
|
+
|
8
|
+
## EMACS
|
9
|
+
*~
|
10
|
+
\#*
|
11
|
+
.\#*
|
12
|
+
|
13
|
+
## VIM
|
14
|
+
*.swp
|
15
|
+
|
16
|
+
## PROJECT::GENERAL
|
17
|
+
coverage
|
18
|
+
rdoc
|
19
|
+
pkg
|
20
|
+
|
21
|
+
## PROJECT::SPECIFIC
|
22
|
+
*.gem
|
23
|
+
examples/pr_*
|
24
|
+
|
25
|
+
## NETBEANS
|
26
|
+
nbproject/*
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Marcel Massana
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
= uri_pathname
|
2
|
+
|
3
|
+
UriPathname eases the conversions between URIs and unique valid pathnames. This
|
4
|
+
feature might be useful, for instance, when:
|
5
|
+
|
6
|
+
* Web Spidering: You want to save webpages to files, or save all their contents within a directory, or only some scraped data, ... and don't know how to name them. UriPathname can assign easily unique valide names to files or directories from already known URIs, combinig scheme, hostname, path and vars.
|
7
|
+
|
8
|
+
* Web Stubbing / Testing: You need to retrieve previously saved webpages by means of their URIs. UriPathname guesses the pathname from a given URI, then you can use, for instance, your File.read to read that file/page.
|
9
|
+
|
10
|
+
== Installation
|
11
|
+
|
12
|
+
$ sudo gem install uri_pathname
|
13
|
+
|
14
|
+
The main source repository is http://github.com/syborg/uri_pathname.
|
15
|
+
|
16
|
+
== Examples
|
17
|
+
|
18
|
+
(see examples directory)
|
19
|
+
|
20
|
+
=== First of all
|
21
|
+
|
22
|
+
Require at least ...
|
23
|
+
|
24
|
+
require 'rubygems'
|
25
|
+
require 'uri_pathname'
|
26
|
+
|
27
|
+
=== Generate pathnames from URIs
|
28
|
+
|
29
|
+
Theses examples are useful when web spidering. You will need to generate a
|
30
|
+
pathname given an URI.
|
31
|
+
|
32
|
+
# From URI with path and query
|
33
|
+
puts up.uri_to_pathname("http://www.fake.fak/path1/path2?query")
|
34
|
+
# => www.fake.fak__|path1_|_path2?query(http)
|
35
|
+
|
36
|
+
# From URI without path and query
|
37
|
+
puts up.uri_to_pathname("http://www.fake.fak")
|
38
|
+
# => www.fake.fak__|_NOPATH_(http)
|
39
|
+
|
40
|
+
# Fragments or other URI parts are silently ignored
|
41
|
+
puts up.uri_to_pathname('http://donaldfagen.com/disc_nightfly.php#rubybaby')
|
42
|
+
# => donaldfagen.com__|disc_nightfly.php(http)
|
43
|
+
|
44
|
+
# If a directory is given as a second arg, pathname will be prepended with that
|
45
|
+
# expanded directory
|
46
|
+
puts up.uri_to_pathname("http://www.fake.fak", "~/my_webdumps")
|
47
|
+
# => /home/marcel/my_webdumps/www.fake.fak__|_NOPATH_(http)
|
48
|
+
|
49
|
+
# Also a third argument can be given and will be appended as an extension
|
50
|
+
puts up.uri_to_pathname("http://www.fake.fak/path","", ".html")
|
51
|
+
# => www.fake.fak__|path(http).html
|
52
|
+
|
53
|
+
=== Recovering URIs from (correct) pathnames
|
54
|
+
|
55
|
+
When stubbing with tools like fakeweb, you can use reverse conversion to
|
56
|
+
register fake accesses to real URIs.
|
57
|
+
|
58
|
+
# pathnames can be parsed if correctly generated as above examples (see docs)
|
59
|
+
p up.parse('/home/marcel/my_webdumps/www.fake.fak__|path1_|_path2?query(http).html.gz')
|
60
|
+
|
61
|
+
# URI can be also retrieved from correct (Uri)pathnames
|
62
|
+
puts up.pathname_to_uri('/home/marcel/my_webdumps/www.fake.fak__|_NOPATH_(http).html.gz')
|
63
|
+
# => http://www.fake.fak
|
64
|
+
puts up.pathname_to_uri('www.fake.fak__|path?query(http).html.gz')
|
65
|
+
# => http://www.fake.fak/path?query
|
66
|
+
|
67
|
+
=== Web Spidering / Dumping / Stubbing
|
68
|
+
|
69
|
+
This example shows how tu use UriPathname to assign names to files and also
|
70
|
+
registering those files to stubb real accesses later.
|
71
|
+
|
72
|
+
require 'rubygems'
|
73
|
+
require 'fileutils'
|
74
|
+
require 'open-uri'
|
75
|
+
require 'uri_pathname'
|
76
|
+
require 'fakeweb' # gem install fakeweb
|
77
|
+
|
78
|
+
# put here whatever temporary directory name to use
|
79
|
+
MY_DIR=File.expand_path '~/my_dumps'
|
80
|
+
# put here whatever URIs u want to access
|
81
|
+
MY_URIS = [
|
82
|
+
'http://en.wikipedia.org/wiki/Ruby_Bridges',
|
83
|
+
'http://donaldfagen.com/disc_nightfly.php',
|
84
|
+
'http://www.rubi.cat/ajrubi/portada/index.php',
|
85
|
+
'http://www.google.com/cse?q=array&cx=013598269713424429640%3Ag5orptiw95w&ie=UTF-8&sa=Search'
|
86
|
+
]
|
87
|
+
|
88
|
+
# some convenient defs
|
89
|
+
def prepare_example
|
90
|
+
File.makedirs(MY_DIR) unless (File.exist?(MY_DIR) and File.directory?(MY_DIR))
|
91
|
+
end
|
92
|
+
|
93
|
+
# preparation (comment this if you've already got your test dir)
|
94
|
+
prepare_example
|
95
|
+
|
96
|
+
up = UriPathname.new
|
97
|
+
|
98
|
+
# 1st round: Capture MY_URIS, and save them with appropiate UriPathname
|
99
|
+
puts "1- Capturing URIs"
|
100
|
+
data = nil
|
101
|
+
sizes = []
|
102
|
+
MY_URIS.each do |uri|
|
103
|
+
open uri do |u|
|
104
|
+
data=u.read
|
105
|
+
pathname = up.uri_to_pathname(uri,MY_DIR,".html")
|
106
|
+
File.open(pathname,'w') do |f|
|
107
|
+
f.write data
|
108
|
+
sizes << data.size
|
109
|
+
puts "SAVED #{uri} : #{data.size} bytes"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# 2nd round: checking saved files and preparing
|
115
|
+
puts "\n2- CHECKING CAPTURED FILES AND PREPARING FAKE WEB ACCESSES"
|
116
|
+
FakeWeb.allow_net_connect=false
|
117
|
+
Dir[File.join(MY_DIR,"*")].each do |name|
|
118
|
+
uri = up.pathname_to_uri name
|
119
|
+
FakeWeb.register_uri :any, uri, :body=>name, :content_type=>"text/html"
|
120
|
+
puts "#{name}\n\tcorresponds to #{uri}"
|
121
|
+
end
|
122
|
+
|
123
|
+
# 3nd round: Access Web without actually accessing web
|
124
|
+
puts "\n3- FAKE WEB ACCESSES"
|
125
|
+
MY_URIS.each_with_index do |uri,i|
|
126
|
+
open uri do |u|
|
127
|
+
data=u.read
|
128
|
+
puts "FAKE #{uri} ACCESS #{(data.size == sizes[i]) ? 'OK' : 'KO'}: #{data.size} bytes"
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
== Release Notes
|
133
|
+
|
134
|
+
* At present, UriPathname uses only some parts of an URI (scheme, hostname, path and query) to generate a valid and unique pathname that can be backconverted to URI. Port, User and other URI features are not yet used. I haven't had the necessity to include them too ;-).
|
135
|
+
|
136
|
+
* Only Linux pathnames have been taken into account. I don't know if UriPathname will generate correct Windows or OSX pathnames, for instance. Test it and feel free to collaborate.
|
137
|
+
|
138
|
+
* This is a very early release. I haven't got the time to study and prepare tests, nonetheless, some examples will become tests in the future
|
139
|
+
|
140
|
+
== Note on Patches/Pull Requests
|
141
|
+
|
142
|
+
* Fork the project.
|
143
|
+
* Make your feature addition or bug fix.
|
144
|
+
* Add tests for it. This is important so I don't break it in a future version unintentionally.
|
145
|
+
* Commit, do not mess with rakefile, version, or history. (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
146
|
+
* Send me a pull request. Bonus points for topic branches.
|
147
|
+
|
148
|
+
== Copyright
|
149
|
+
|
150
|
+
Copyright (c) 2011 Marcel Massana. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
require './lib/uri_pathname/version'
|
7
|
+
Jeweler::Tasks.new do |gem|
|
8
|
+
gem.name = "uri_pathname"
|
9
|
+
gem.summary = %Q{Simple converter between URIs and Pathnames}
|
10
|
+
gem.description = %Q{Simple converter between URIs and Pathnames.
|
11
|
+
It creates valid, unique and readable filenames from URIs and
|
12
|
+
viceversa. It can be used to name files while saving data from
|
13
|
+
websites and conversely, read files assigned to URIs while, for
|
14
|
+
instance, simulating or stubbing web accesses by means of reading
|
15
|
+
files}.gsub(/\s+/,' ')
|
16
|
+
gem.email = "xaxaupua@gmail.com"
|
17
|
+
gem.homepage = "http://github.com/syborg/uri_pathname"
|
18
|
+
gem.authors = ["Marcel Massana"]
|
19
|
+
# gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
20
|
+
gem.version = UriPathname::Version::STRING
|
21
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
22
|
+
end
|
23
|
+
Jeweler::GemcutterTasks.new
|
24
|
+
rescue LoadError
|
25
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
26
|
+
end
|
27
|
+
|
28
|
+
require 'rake/testtask'
|
29
|
+
Rake::TestTask.new(:test) do |test|
|
30
|
+
test.libs << 'lib' << 'test'
|
31
|
+
test.pattern = 'test/**/test_*.rb'
|
32
|
+
test.verbose = true
|
33
|
+
end
|
34
|
+
|
35
|
+
begin
|
36
|
+
require 'rcov/rcovtask'
|
37
|
+
Rcov::RcovTask.new do |test|
|
38
|
+
test.libs << 'test'
|
39
|
+
test.pattern = 'test/**/test_*.rb'
|
40
|
+
test.verbose = true
|
41
|
+
end
|
42
|
+
rescue LoadError
|
43
|
+
task :rcov do
|
44
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
task :test => :check_dependencies
|
49
|
+
|
50
|
+
task :default => :test
|
51
|
+
|
52
|
+
require 'rake/rdoctask'
|
53
|
+
Rake::RDocTask.new do |rdoc|
|
54
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
55
|
+
|
56
|
+
rdoc.rdoc_dir = 'rdoc'
|
57
|
+
rdoc.title = "uri_pathname #{version}"
|
58
|
+
rdoc.rdoc_files.include('README*')
|
59
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
60
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# simple_examples
|
2
|
+
# MME 26/8/2011
|
3
|
+
|
4
|
+
require 'uri_pathname'
|
5
|
+
|
6
|
+
up = UriPathname.new
|
7
|
+
|
8
|
+
# GENERATE PATHNAMES
|
9
|
+
|
10
|
+
# From URI with path and query
|
11
|
+
puts up.uri_to_pathname("http://www.fake.fak/path1/path2?query")
|
12
|
+
# => www.fake.fak__|path1_|_path2?query(http)
|
13
|
+
|
14
|
+
# From URI without path and query
|
15
|
+
puts up.uri_to_pathname("http://www.fake.fak")
|
16
|
+
# => www.fake.fak__|_NOPATH_(http)
|
17
|
+
|
18
|
+
# Fragments or other URI parts are silently ignored
|
19
|
+
puts up.uri_to_pathname('http://donaldfagen.com/disc_nightfly.php#rubybaby')
|
20
|
+
# => donaldfagen.com__|disc_nightfly.php(http)
|
21
|
+
|
22
|
+
# If a directory is given as a second arg, pathname will be prepended with that
|
23
|
+
# expanded directory
|
24
|
+
puts up.uri_to_pathname("http://www.fake.fak", "~/my_webdumps")
|
25
|
+
# => /home/marcel/my_webdumps/www.fake.fak__|_NOPATH_(http)
|
26
|
+
|
27
|
+
# Also a third argument can be given and will be appended as an extension
|
28
|
+
puts up.uri_to_pathname("http://www.fake.fak/path","", ".html")
|
29
|
+
# => www.fake.fak__|path(http).html
|
30
|
+
|
31
|
+
# RECOVERING URIs FROM PATHNAMES
|
32
|
+
|
33
|
+
# pathnames can be parsed if correctly generated as above examples (see docs)
|
34
|
+
p up.parse('/home/marcel/my_webdumps/www.fake.fak__|path1_|_path2?query(http).html.gz')
|
35
|
+
|
36
|
+
# URI can be also retrieved from correct (Uri)pathnames
|
37
|
+
puts up.pathname_to_uri('/home/marcel/my_webdumps/www.fake.fak__|_NOPATH_(http).html.gz')
|
38
|
+
# => http://www.fake.fak
|
39
|
+
puts up.pathname_to_uri('www.fake.fak__|path?query(http).html.gz')
|
40
|
+
# => http://www.fake.fak/path?query
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# fakeweb
|
2
|
+
# MME 27/8/2011
|
3
|
+
#
|
4
|
+
# This example demonstrates how uri_pathname should be used to save and restore
|
5
|
+
# pages, including a demo with fakeweb (a web stub utility) to simulate web
|
6
|
+
# accesses.
|
7
|
+
|
8
|
+
require 'rubygems'
|
9
|
+
require 'fileutils'
|
10
|
+
require 'open-uri'
|
11
|
+
require 'uri_pathname'
|
12
|
+
require 'fakeweb' # gem install fakeweb
|
13
|
+
|
14
|
+
# put here whatever temporary directory name to use
|
15
|
+
MY_DIR=File.expand_path '~/my_dumps'
|
16
|
+
# put here whatever URIs u want to access
|
17
|
+
MY_URIS = [
|
18
|
+
'http://en.wikipedia.org/wiki/Ruby_Bridges',
|
19
|
+
'http://donaldfagen.com/disc_nightfly.php',
|
20
|
+
'http://www.rubi.cat/ajrubi/portada/index.php',
|
21
|
+
'http://www.google.com/cse?q=array&cx=013598269713424429640%3Ag5orptiw95w&ie=UTF-8&sa=Search'
|
22
|
+
]
|
23
|
+
|
24
|
+
# some convenient defs
|
25
|
+
def prepare_example
|
26
|
+
File.makedirs(MY_DIR) unless (File.exist?(MY_DIR) and File.directory?(MY_DIR))
|
27
|
+
end
|
28
|
+
|
29
|
+
# preparation (comment this if you've already got your test dir)
|
30
|
+
prepare_example
|
31
|
+
|
32
|
+
up = UriPathname.new
|
33
|
+
|
34
|
+
# 1st round: Capture MY_URIS, and save them with appropiate UriPathname
|
35
|
+
puts "1- Capturing URIs"
|
36
|
+
data = nil
|
37
|
+
sizes = []
|
38
|
+
MY_URIS.each do |uri|
|
39
|
+
open uri do |u|
|
40
|
+
data=u.read
|
41
|
+
pathname = up.uri_to_pathname(uri,MY_DIR,".html")
|
42
|
+
File.open(pathname,'w') do |f|
|
43
|
+
f.write data
|
44
|
+
sizes << data.size
|
45
|
+
puts "SAVED #{uri} : #{data.size} bytes"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# 2nd round: checking saved files and preparing
|
51
|
+
puts "\n2- CHECKING CAPTURED FILES AND PREPARING FAKE WEB ACCESSES"
|
52
|
+
FakeWeb.allow_net_connect=false
|
53
|
+
Dir[File.join(MY_DIR,"*")].each do |name|
|
54
|
+
uri = up.pathname_to_uri name
|
55
|
+
FakeWeb.register_uri :any, uri, :body=>name, :content_type=>"text/html"
|
56
|
+
puts "#{name}\n\tcorresponds to #{uri}"
|
57
|
+
end
|
58
|
+
|
59
|
+
# 3nd round: Access Web without actually accessing web
|
60
|
+
puts "\n3- FAKE WEB ACCESSES"
|
61
|
+
MY_URIS.each_with_index do |uri,i|
|
62
|
+
open uri do |u|
|
63
|
+
data=u.read
|
64
|
+
puts "FAKE #{uri} ACCESS #{(data.size == sizes[i]) ? 'OK' : 'KO'}: #{data.size} bytes"
|
65
|
+
end
|
66
|
+
end
|
data/lib/uri_pathname.rb
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
# uri_pathname
|
2
|
+
# MME 25/8/2011
|
3
|
+
|
4
|
+
require 'uri'
|
5
|
+
require 'uri_pathname/version'
|
6
|
+
|
7
|
+
class UriPathname
|
8
|
+
|
9
|
+
# Error codes
|
10
|
+
class InvalidPathName < StandardError; end
|
11
|
+
class InvalidURI < StandardError; end
|
12
|
+
|
13
|
+
PTH_SEP = '_|_' # used to swap chars from and to '/' while converting to
|
14
|
+
# pathname and viceversa. To work it should be a String that
|
15
|
+
# doesn't exist in URIs and that can be used in pathnames.
|
16
|
+
|
17
|
+
HOST_SEP = '__|' # used to separate hostname from the
|
18
|
+
|
19
|
+
NO_PTH = '_NOPATH_' # used to make a filename for root uris (empty path).
|
20
|
+
# To work it should be a String that doesn't exist in uris
|
21
|
+
|
22
|
+
BASE_DIR = '' # dir prepended to all pathnames
|
23
|
+
|
24
|
+
HOST_2_DIR = true # URI hostname will become a subdirectory?
|
25
|
+
|
26
|
+
#default attributes
|
27
|
+
DEFAULT_ATTRS = {
|
28
|
+
:pth_sep => PTH_SEP,
|
29
|
+
:host_sep => HOST_SEP,
|
30
|
+
:no_pth => NO_PTH,
|
31
|
+
:base_dir => BASE_DIR,
|
32
|
+
:host_2_dir => HOST_2_DIR
|
33
|
+
}
|
34
|
+
|
35
|
+
attr_accessor *(DEFAULT_ATTRS.keys)
|
36
|
+
|
37
|
+
# Initializes an UriPathname that can be used to convert between URIs and
|
38
|
+
# pathnames. +options+ is a hash that can contain any of:
|
39
|
+
# :pth_sep => String that will be used to substitute '/' inside URI's path and queries (default UriPathname::PTH_SEP)
|
40
|
+
# :host_sep => String that will be used to substitute '/' between host and path (default UriPathname::HOST_SEP)
|
41
|
+
# :no_pth => String that will be used as a path placeholder when no URI's path exists, (default UriPathname::NO_PTH)
|
42
|
+
# :base_dir => String containing base directory prepended to any pathname, (default UriPathname::DEFAULT_DIR)
|
43
|
+
# :host_2_dir => If true, hostnames will become subdirectories (default UriPathname::HOST_2_DIR)
|
44
|
+
def initialize(options = {})
|
45
|
+
|
46
|
+
attributes = DEFAULT_ATTRS.merge options if options.is_a? Hash
|
47
|
+
|
48
|
+
attributes.each { |k,v| instance_eval("@#{k}='#{v}'") if DEFAULT_ATTRS.keys.include?(k) }
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
# Converts an uri to a valid pathname, that is:
|
53
|
+
# [_DIR_/]_hostname_HS_path_?_query_(_scheme_)[._extension_]
|
54
|
+
# where
|
55
|
+
# DIR is +basedirpath+ if given else _@base_dir_
|
56
|
+
# HS is UriPathname::HOST_SEP
|
57
|
+
# all '/' within _path_ and _query_ will be substituted by
|
58
|
+
# UriPathname::PTH_SEP value.
|
59
|
+
def uri_to_pathname(uri,basedirpath=nil,extension="")
|
60
|
+
arr = URI.split uri
|
61
|
+
# raise(InvalidURI, "#{uri} lacks scheme and/or host") unless (arr[0] && arr[2])
|
62
|
+
return nil unless (arr[0] && arr[2])
|
63
|
+
scheme = "(" << arr[0] << ")"
|
64
|
+
host = arr[2]
|
65
|
+
path = arr[5]
|
66
|
+
if path.size <= 1 # "" or "/"
|
67
|
+
path = @no_pth
|
68
|
+
else
|
69
|
+
path=path[1..-1].gsub("/",@pth_sep) # first '/' isn't necessary
|
70
|
+
end
|
71
|
+
query = arr[7]
|
72
|
+
query = if query
|
73
|
+
"?" << query.gsub("/",@pth_sep)
|
74
|
+
else
|
75
|
+
""
|
76
|
+
end
|
77
|
+
extension.insert(0,'.') unless extension.empty? or extension.start_with? '.'
|
78
|
+
pathname = host << @host_sep << path << query << scheme << extension
|
79
|
+
basedirpath = @base_dir unless basedirpath
|
80
|
+
basedirpath.size > 0 ? File.expand_path(pathname, basedirpath) : pathname
|
81
|
+
end
|
82
|
+
|
83
|
+
# converts +pathname+ to an URI. UriPathname::PTH_SEP appearances will be
|
84
|
+
# substituted by "/" (the standard path separator).
|
85
|
+
# returns the URI String if +pathname+ was convertible or nil in other case.
|
86
|
+
def pathname_to_uri(pathname)
|
87
|
+
pn = self.parse(pathname)
|
88
|
+
pn ? pn[7] : nil
|
89
|
+
end
|
90
|
+
|
91
|
+
# splits up a +pathname+ and returns an array where:
|
92
|
+
# 1. the 3 first elements correspond to Pathname
|
93
|
+
# - arr[0] = dirname
|
94
|
+
# - arr[1] = basename (without extension)
|
95
|
+
# - arr[2] = extension
|
96
|
+
# 2. the last elements correspond to URI
|
97
|
+
# - arr[3] = scheme
|
98
|
+
# - arr[4] = hostname
|
99
|
+
# - arr[5] = path
|
100
|
+
# - arr[6] = query
|
101
|
+
# - arr[7] = URI (complete)
|
102
|
+
# returns nil if +pathname+ doesn't correspond to UriPathname format (see
|
103
|
+
# UriPathname::uri_to_pathname)
|
104
|
+
def parse(pathname)
|
105
|
+
# raise(InvalidPathName, "pathname should be a String") \
|
106
|
+
# unless pathname.respond_to? :to_s
|
107
|
+
return nil unless pathname.respond_to? :to_s
|
108
|
+
complete_pathname = File.expand_path(pathname.to_s)
|
109
|
+
|
110
|
+
# pathname parsed from complete_pathname
|
111
|
+
extension = complete_pathname.slice!(/(\.[^\)\(\/]*)?$/)
|
112
|
+
dirname, basename = File.split(complete_pathname)
|
113
|
+
|
114
|
+
# further uri restored
|
115
|
+
# hostname
|
116
|
+
if @host_sep == '/'
|
117
|
+
path_to_host, rest_of_thing = dirname, basename
|
118
|
+
hostname = File.basename(path_to_host)
|
119
|
+
else
|
120
|
+
rest_of_thing = basename
|
121
|
+
hostname, rest_of_thing = rest_of_thing.split(@host_sep)
|
122
|
+
end
|
123
|
+
# path, query and scheme
|
124
|
+
return nil unless rest_of_thing =~ /(.*)\(([^\(]+)\)$/ and !hostname.empty?
|
125
|
+
path_query, scheme= rest_of_thing.match(/(.*)\(([^\(]+)\)$/)[1..2]
|
126
|
+
uri = scheme + '://' + hostname
|
127
|
+
case path_query
|
128
|
+
when @no_pth
|
129
|
+
path = ''
|
130
|
+
query = ''
|
131
|
+
else
|
132
|
+
path, query = path_query.gsub(@pth_sep,"/").split("?")
|
133
|
+
end
|
134
|
+
uri += "/" + path unless path.nil? or path.empty?
|
135
|
+
uri += "?" + query unless query.nil? or query.empty?
|
136
|
+
arr = []
|
137
|
+
arr << dirname << basename << extension << scheme << hostname << path << query << uri
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|
141
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'uri_pathname'
|
3
|
+
|
4
|
+
class TC_Uri2Pathname < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def setup
|
7
|
+
@up = UriPathname.new
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_uri_without_path
|
11
|
+
assert_equal('www.fake.fak__|_NOPATH_(http)',@up.uri_to_pathname('http://www.fake.fak'))
|
12
|
+
assert_equal('www.fake.fak__|_NOPATH_(http)',@up.uri_to_pathname('http://www.fake.fak/'))
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_uri_with_path
|
16
|
+
assert_equal('www.fake.fak__|path1_|_path2(http)',@up.uri_to_pathname('http://www.fake.fak/path1/path2'))
|
17
|
+
assert_equal('foo-bar.com__|yay?_|_hi(http)',@up.uri_to_pathname('http://foo-bar.com/yay?/hi'))
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_uri_with_path_and_fragment
|
21
|
+
assert_equal('donaldfagen.com__|disc_nightfly.php(http)',@up.uri_to_pathname('http://donaldfagen.com/disc_nightfly.php#rubybaby'))
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_uri_with_path_and_query
|
25
|
+
assert_equal('www.fake.fak__|path1_|_path2?query(http)',@up.uri_to_pathname('http://www.fake.fak/path1/path2?query'))
|
26
|
+
assert_equal('foo-bar.com__|yay_|_?foo=bar&a=22(http)',@up.uri_to_pathname('http://foo-bar.com/yay/?foo=bar&a=22'))
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_uri_dir_expansion_and_exension
|
30
|
+
assert_equal('/tmp/my_webdumps/www.fake.fak__|path1_|_path2?query(http).html',@up.uri_to_pathname("http://www.fake.fak/path1/path2?query", "/tmp/my_webdumps",".html"))
|
31
|
+
assert_equal('/tmp/my_webdumps/www.fake.fak__|path1_|_path2?query(http).html',@up.uri_to_pathname("http://www.fake.fak/path1/path2?query", "/tmp/my_webdumps","html"))
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
class TC_Pathname2Uri < Test::Unit::TestCase
|
37
|
+
|
38
|
+
def setup
|
39
|
+
@up = UriPathname.new
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_pn_absolute
|
43
|
+
assert_equal('http://www.fake.fak',@up.pathname_to_uri('/home/marcel/my_webdumps/www.fake.fak__|_NOPATH_(http).html.gz'))
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_pn_relative
|
47
|
+
assert_equal('http://www.fake.fak',@up.pathname_to_uri('my_webdumps/www.fake.fak__|_NOPATH_(http).html.gz'))
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_pn_malformed
|
51
|
+
assert_equal(nil,@up.pathname_to_uri('my_webdumps/www.fake.fak__|_NOPATH_(http)html.gz'), "No dot after last )")
|
52
|
+
assert_equal(nil,@up.pathname_to_uri('my_webdumps/www.fake.fak__|_NOPATH_(http.html.gz'), "No clossing )")
|
53
|
+
assert_equal(nil,@up.pathname_to_uri('my_webdumps/www.fake.fak__|_NOPATH_http).html.gz'), "No opening (")
|
54
|
+
assert_equal(nil,@up.pathname_to_uri('my_webdumps/__|_NOPATH_(http).html.gz'), "No host")
|
55
|
+
assert_equal(nil,@up.pathname_to_uri('my_webdumps/www.fake.fak__|_NOPATH_().html.gz'), "No scheme")
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "uri_pathname"
|
8
|
+
s.version = "0.0.0.pre2"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new("> 1.3.1") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Marcel Massana"]
|
12
|
+
s.date = "2011-08-30"
|
13
|
+
s.description = "Simple converter between URIs and Pathnames. It creates valid, unique and readable filenames from URIs and viceversa. It can be used to name files while saving data from websites and conversely, read files assigned to URIs while, for instance, simulating or stubbing web accesses by means of reading files"
|
14
|
+
s.email = "xaxaupua@gmail.com"
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
"LICENSE",
|
23
|
+
"README.rdoc",
|
24
|
+
"Rakefile",
|
25
|
+
"examples/simple_examples.rb",
|
26
|
+
"examples/using_fakeweb.rb",
|
27
|
+
"lib/uri_pathname.rb",
|
28
|
+
"lib/uri_pathname/version.rb",
|
29
|
+
"test/test_uri_pathname.rb",
|
30
|
+
"uri_pathname.gemspec"
|
31
|
+
]
|
32
|
+
s.homepage = "http://github.com/syborg/uri_pathname"
|
33
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
34
|
+
s.require_paths = ["lib"]
|
35
|
+
s.rubygems_version = "1.8.10"
|
36
|
+
s.summary = "Simple converter between URIs and Pathnames"
|
37
|
+
s.test_files = [
|
38
|
+
"test/test_uri_pathname.rb",
|
39
|
+
"examples/simple_examples.rb",
|
40
|
+
"examples/using_fakeweb.rb"
|
41
|
+
]
|
42
|
+
|
43
|
+
if s.respond_to? :specification_version then
|
44
|
+
s.specification_version = 3
|
45
|
+
|
46
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
47
|
+
else
|
48
|
+
end
|
49
|
+
else
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
metadata
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: uri_pathname
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: -223651667
|
5
|
+
prerelease: 6
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
- pre
|
11
|
+
- 2
|
12
|
+
version: 0.0.0.pre2
|
13
|
+
platform: ruby
|
14
|
+
authors:
|
15
|
+
- Marcel Massana
|
16
|
+
autorequire:
|
17
|
+
bindir: bin
|
18
|
+
cert_chain: []
|
19
|
+
|
20
|
+
date: 2011-08-30 00:00:00 Z
|
21
|
+
dependencies: []
|
22
|
+
|
23
|
+
description: Simple converter between URIs and Pathnames. It creates valid, unique and readable filenames from URIs and viceversa. It can be used to name files while saving data from websites and conversely, read files assigned to URIs while, for instance, simulating or stubbing web accesses by means of reading files
|
24
|
+
email: xaxaupua@gmail.com
|
25
|
+
executables: []
|
26
|
+
|
27
|
+
extensions: []
|
28
|
+
|
29
|
+
extra_rdoc_files:
|
30
|
+
- LICENSE
|
31
|
+
- README.rdoc
|
32
|
+
files:
|
33
|
+
- .document
|
34
|
+
- .gitignore
|
35
|
+
- LICENSE
|
36
|
+
- README.rdoc
|
37
|
+
- Rakefile
|
38
|
+
- examples/simple_examples.rb
|
39
|
+
- examples/using_fakeweb.rb
|
40
|
+
- lib/uri_pathname.rb
|
41
|
+
- lib/uri_pathname/version.rb
|
42
|
+
- test/test_uri_pathname.rb
|
43
|
+
- uri_pathname.gemspec
|
44
|
+
homepage: http://github.com/syborg/uri_pathname
|
45
|
+
licenses: []
|
46
|
+
|
47
|
+
post_install_message:
|
48
|
+
rdoc_options:
|
49
|
+
- --charset=UTF-8
|
50
|
+
require_paths:
|
51
|
+
- lib
|
52
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ">"
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
hash: 25
|
67
|
+
segments:
|
68
|
+
- 1
|
69
|
+
- 3
|
70
|
+
- 1
|
71
|
+
version: 1.3.1
|
72
|
+
requirements: []
|
73
|
+
|
74
|
+
rubyforge_project:
|
75
|
+
rubygems_version: 1.8.10
|
76
|
+
signing_key:
|
77
|
+
specification_version: 3
|
78
|
+
summary: Simple converter between URIs and Pathnames
|
79
|
+
test_files:
|
80
|
+
- test/test_uri_pathname.rb
|
81
|
+
- examples/simple_examples.rb
|
82
|
+
- examples/using_fakeweb.rb
|