indexer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. data/.index +54 -0
  2. data/HISTORY.md +9 -0
  3. data/README.md +145 -0
  4. data/bin/index +7 -0
  5. data/data/indexer/r2013/index.kwalify +175 -0
  6. data/data/indexer/r2013/index.yes +172 -0
  7. data/data/indexer/r2013/index.yesi +67 -0
  8. data/data/indexer/r2013/ruby.txt +35 -0
  9. data/data/indexer/r2013/yaml.txt +30 -0
  10. data/lib/indexer.rb +65 -0
  11. data/lib/indexer/attributes.rb +171 -0
  12. data/lib/indexer/command.rb +260 -0
  13. data/lib/indexer/components.rb +8 -0
  14. data/lib/indexer/components/author.rb +140 -0
  15. data/lib/indexer/components/conflict.rb +78 -0
  16. data/lib/indexer/components/copyright.rb +95 -0
  17. data/lib/indexer/components/dependency.rb +18 -0
  18. data/lib/indexer/components/organization.rb +133 -0
  19. data/lib/indexer/components/repository.rb +140 -0
  20. data/lib/indexer/components/requirement.rb +360 -0
  21. data/lib/indexer/components/resource.rb +209 -0
  22. data/lib/indexer/conversion.rb +14 -0
  23. data/lib/indexer/conversion/gemfile.rb +44 -0
  24. data/lib/indexer/conversion/gemspec.rb +114 -0
  25. data/lib/indexer/conversion/gemspec_exporter.rb +304 -0
  26. data/lib/indexer/core_ext.rb +4 -0
  27. data/lib/indexer/error.rb +23 -0
  28. data/lib/indexer/gemfile.rb +75 -0
  29. data/lib/indexer/importer.rb +144 -0
  30. data/lib/indexer/importer/file.rb +94 -0
  31. data/lib/indexer/importer/gemfile.rb +27 -0
  32. data/lib/indexer/importer/gemspec.rb +43 -0
  33. data/lib/indexer/importer/html.rb +289 -0
  34. data/lib/indexer/importer/markdown.rb +45 -0
  35. data/lib/indexer/importer/ruby.rb +47 -0
  36. data/lib/indexer/importer/version.rb +38 -0
  37. data/lib/indexer/importer/yaml.rb +46 -0
  38. data/lib/indexer/loadable.rb +159 -0
  39. data/lib/indexer/metadata.rb +879 -0
  40. data/lib/indexer/model.rb +237 -0
  41. data/lib/indexer/revision.rb +43 -0
  42. data/lib/indexer/valid.rb +287 -0
  43. data/lib/indexer/validator.rb +313 -0
  44. data/lib/indexer/version/constraint.rb +124 -0
  45. data/lib/indexer/version/exceptions.rb +11 -0
  46. data/lib/indexer/version/number.rb +497 -0
  47. metadata +141 -0
@@ -0,0 +1,4 @@
1
+ require 'indexer/core_ext/hash/to_h'
2
+ require 'indexer/core_ext/hash/rekey'
3
+ require 'indexer/core_ext/kernel/cli'
4
+
@@ -0,0 +1,23 @@
1
+ module Indexer
2
+
3
+ # Tag module for Metaspec Exceptions.
4
+ #
5
+ # Use this module to extend arbitrary errors raised by Metaspec,
6
+ # so they can be easily identified as Metaspec errors if need be.
7
+ module Error
8
+ # Just catch the error and raise this instead.
9
+ def self.exception(msg=nil,orig=$!)
10
+ if Class === orig
11
+ orig = orig.new(msg)
12
+ elsif orig.nil?
13
+ orig = StandardError.new(msg)
14
+ else
15
+ orig = orig.exception(msg) if msg
16
+ end
17
+ orig.extend self
18
+ orig
19
+ end
20
+ end
21
+
22
+ end
23
+
@@ -0,0 +1,75 @@
1
+ #module Indexer
2
+
3
+ # Make sure Indexer is loaded.
4
+ require 'indexer' unless defined?(Indexer)
5
+
6
+ require 'bundler'
7
+
8
+ # Bundler integration.
9
+ #
10
+ # This does not support Bundler's `:git` references
11
+ # or `:require` option (at least not yet).
12
+ #
13
+ module Bundler
14
+
15
+ # Mixin for Bundler::Dsl.
16
+ #
17
+ class Dsl
18
+
19
+ #
20
+ # Dynamically update a Gemfile from the `.index` file. Just call `index`
21
+ # from your Gemfile.
22
+ #
23
+ # rubyfile
24
+ #
25
+ # This is analogous to the Gemfile's `gemspec` method.
26
+ #
27
+ def index
28
+ spec = Indexer::Metadata.open
29
+ spec.requirements.each do |req|
30
+ next if req.external?
31
+ gem(req.name, req.version, :group=>req.groups)
32
+ end
33
+ end
34
+
35
+ #
36
+ def metadata
37
+ @metadata ||= Indexer::Metadata.new
38
+ end
39
+
40
+ #
41
+ alias :_method_missing :method_missing
42
+
43
+ #
44
+ # Evaluating on the Builder instance, allows Ruby basic metadata
45
+ # to be built via this method.
46
+ #
47
+ def method_missing(s, *a, &b)
48
+ r = s.to_s.chomp('=')
49
+ case a.size
50
+ when 0
51
+ if metadata.respond_to?(s)
52
+ return metadata.__send__(s, &b)
53
+ end
54
+ when 1
55
+ if metadata.respond_to?("#{r}=")
56
+ return metadata.__send__("#{r}=", *a)
57
+ end
58
+ else
59
+ if metadata.respond_to?("#{r}=")
60
+ return metadata.__send__("#{r}=", a)
61
+ end
62
+ end
63
+
64
+ _method_missing(s, *a, &b)
65
+ #super(s, *a, &b) # if cases don't match-up
66
+ end
67
+
68
+ end
69
+
70
+ end
71
+
72
+ #end
73
+
74
+ #::Bundler::Dsl.__send__(:include, Indexer::Bundler::Dsl)
75
+
@@ -0,0 +1,144 @@
1
+ module Indexer
2
+
3
+ # Import external sources into metadata.
4
+ #
5
+ def self.import(*sources)
6
+ Importer.import(*sources)
7
+ end
8
+
9
+ # Importer class takes disperate data sources and imports them
10
+ # into a Metadata instance.
11
+ #
12
+ # Mixins are used to inject import behavior by overriding the `#import` method.
13
+ # Any such mixin's #import method must call `#super` if it's method doesn't
14
+ # apply, allowing the routine to fallback the other possible import methods.
15
+ #
16
+ class Importer
17
+
18
+ #
19
+ # Require all import mixins.
20
+ #
21
+ def self.require_importers
22
+ require_relative 'importer/file'
23
+ require_relative 'importer/ruby'
24
+ require_relative 'importer/yaml'
25
+ require_relative 'importer/html'
26
+ require_relative 'importer/markdown'
27
+ #require_relative 'importer/rdoc'
28
+ #require_relative 'importer/textile'
29
+ require_relative 'importer/gemspec'
30
+ require_relative 'importer/gemfile'
31
+ require_relative 'importer/version'
32
+ end
33
+
34
+ #
35
+ # Import metadata from external sources.
36
+ #
37
+ def self.import(*source)
38
+ options = (Hash === source.last ? source.pop : {})
39
+
40
+ require_importers
41
+
42
+ #metadata = nil
43
+
44
+ ## use source of current metadata if none given
45
+ ## TODO: Only search the current directory or search up to root?
46
+ if source.empty?
47
+ if file = Dir[LOCK_FILE].first #or `Metadata.exists?` ?
48
+ data = YAML.load_file(file)
49
+ source = Array(data['source'])
50
+ end
51
+ end
52
+
53
+ if source.empty?
54
+ source = [USER_FILE]
55
+ end
56
+
57
+ source.each do |file|
58
+ unless File.exist?(file)
59
+ warn "metadata source file not found - `#{file}'"
60
+ end
61
+ end
62
+
63
+ importer = Importer.new #(metadata)
64
+
65
+ source.each do |src|
66
+ importer.import(src)
67
+ end
68
+
69
+ return importer.metadata
70
+ end
71
+
72
+ #
73
+ # Initialize importer.
74
+ #
75
+ def initialize(metadata=nil)
76
+ @metadata = metadata || Metadata.new
77
+ @file_cache = {}
78
+ end
79
+
80
+ #
81
+ # Metadata being built.
82
+ #
83
+ attr :metadata
84
+
85
+ #
86
+ #
87
+ #
88
+ def import(source)
89
+ success = super(source) if defined?(super)
90
+ if success
91
+ metadata.sources << source unless metadata.sources.include?(source)
92
+ else
93
+ raise "metadata source not found or not a known type -- #{source}"
94
+ end
95
+ end
96
+
97
+ #
98
+ # Provides a file contents cache. This is used by the YAMLImportation
99
+ # script, for instance, to see if the file begins with `---`, in
100
+ # which case the file is taken to be YAML format, even if the
101
+ # file's extension is not `.yml` or `.yaml`.
102
+ #
103
+ def read(file)
104
+ @file_cache[file] ||= File.read(file)
105
+ end
106
+
107
+ #
108
+ # Evaluating on the Importer instance, allows Ruby basic metadata
109
+ # to be built via this method.
110
+ #
111
+ def method_missing(s, *a, &b)
112
+ return if s == :import
113
+
114
+ r = s.to_s.chomp('=')
115
+ case a.size
116
+ when 0
117
+ if metadata.respond_to?(s)
118
+ return metadata.__send__(s, &b)
119
+ end
120
+ when 1
121
+ if metadata.respond_to?("#{r}=")
122
+ return metadata.__send__("#{r}=", *a)
123
+ end
124
+ else
125
+ if metadata.respond_to?("#{r}=")
126
+ return metadata.__send__("#{r}=", a)
127
+ end
128
+ end
129
+
130
+ super(s, *a, &b) # if cases don't match-up
131
+ end
132
+
133
+ #
134
+ # Is `text` a YAML document? It detrmines this simply
135
+ # be checking for `---` at the top of the text.
136
+ #
137
+ # @todo Ignore top comments.
138
+ #
139
+ def yaml?(text)
140
+ text =~ /\A(---|%TAG|%YAML)/
141
+ end
142
+ end
143
+
144
+ end
@@ -0,0 +1,94 @@
1
+ module Indexer
2
+
3
+ class Importer
4
+
5
+ # Import metadata from individual files.
6
+ #
7
+ module FileImportation
8
+
9
+ #
10
+ # Files import procedure.
11
+ #
12
+ def import(source)
13
+ if File.directory?(source)
14
+ load_directory(source)
15
+ true
16
+ else
17
+ super(source) if defined?(super)
18
+ end
19
+ end
20
+
21
+ #
22
+ # Import files from a given directory. This will only import files
23
+ # that have a name corresponding to a metadata attribute, unless
24
+ # the file is listed in a `.index_extra` file within the directory.
25
+ #
26
+ # However, files with an extension of `.yml` or `.yaml` will be loaded
27
+ # wholeclothe and not as a single attribute.
28
+ #
29
+ # @todo Subdirectories are simply omitted. Maybe do otherwise in future?
30
+ #
31
+ def load_directory(folder)
32
+ if File.directory?(folder)
33
+ extra = []
34
+ extra_file = File.join(folder, '.index_extra')
35
+ if File.exist?(extra_file)
36
+ extra = File.read(extra_file).split("\n")
37
+ extra = extra.collect{ |pattern| pattern.strip }
38
+ extra = extra.reject { |pattern| pattern.empty? }
39
+ extra = extra.collect{ |pattern| Dir[File.join(folder, pattern)] }.flatten
40
+ end
41
+ files = Dir[File.join(folder, '*')]
42
+ files.each do |file|
43
+ next if File.directory?(file)
44
+ name = File.basename(file).downcase
45
+ next load_yaml(file) if %w{.yaml .yml}.include?(File.extname(file))
46
+ next load_field_file(file) if extra.include?(name)
47
+ next load_field_file(file) if metadata.attributes.include?(name.to_sym)
48
+ end
49
+ end
50
+ end
51
+
52
+ #
53
+ # Import a field setting from a file.
54
+ #
55
+ # TODO: Ultimately support JSON and maybe other types, and possibly
56
+ # use mime-types library to recognize them.
57
+ #
58
+ def load_field_file(file)
59
+ if File.directory?(file)
60
+ # ...
61
+ else
62
+ case File.extname(file).downcase
63
+ when '.yaml', '.yml'
64
+ name = File.basename(file).downcase
65
+ name = name.chomp('.yaml').chomp('.yml')
66
+ metadata[name] = YAML.load_file(file)
67
+ # TODO: should yaml files with explict extension by merged instead?
68
+ #metadata.merge!(YAML.load_file(file))
69
+ when '.text', '.txt'
70
+ name = File.basename(file).downcase
71
+ name = name.chomp('.text').chomp('.txt')
72
+ text = File.read(file)
73
+ metadata[name] = text.strip
74
+ else
75
+ text = File.read(file)
76
+ if /\A---/ =~ text
77
+ name = File.basename(file).downcase
78
+ metadata[name] = YAML.load(text)
79
+ else
80
+ name = File.basename(file).downcase
81
+ metadata[name] = text.strip
82
+ end
83
+ end
84
+ end
85
+ end
86
+
87
+ end
88
+
89
+ # Include FileImportation mixin into Builder class.
90
+ include FileImportation
91
+
92
+ end
93
+
94
+ end
@@ -0,0 +1,27 @@
1
+ module Indexer
2
+
3
+ class Importer
4
+
5
+ # Build mixin for Bundler's Gemfile.
6
+ #
7
+ module GemfileImportation
8
+ #
9
+ # If the source file is a Gemfile, import it.
10
+ #
11
+ def import(source)
12
+ case source
13
+ when 'Gemfile'
14
+ metadata.import_gemfile(source)
15
+ true
16
+ else
17
+ super(source) if defined?(super)
18
+ end
19
+ end
20
+ end
21
+
22
+ # Include GemfileImportation mixin into Builder class.
23
+ include GemfileImportation
24
+
25
+ end
26
+
27
+ end
@@ -0,0 +1,43 @@
1
+ module Indexer
2
+
3
+ class Importer
4
+
5
+ # It is not the recommended that a .gemspec be the usual source of metadata.
6
+ # Rather it is recommended that a the gemspec be produced from the metadata
7
+ # instead. (Rumber's metadata covers almost every aspect of a emspec, and
8
+ # a gemspec can be augmented where needed.) Nonetheless, a gemspec can serve
9
+ # as a good soruce for creating an initial metadata file.
10
+ #
11
+ module GemspecImportation
12
+
13
+ #
14
+ # If the source file is a gemspec, import it.
15
+ #
16
+ def import(source)
17
+ case File.extname(source)
18
+ when '.gemspec'
19
+ # TODO: handle YAML-based gemspecs
20
+ gemspec = ::Gem::Specification.load(source)
21
+ metadata.import_gemspec(gemspec)
22
+ true
23
+ else
24
+ super(source) if defined?(super)
25
+ end
26
+ end
27
+
28
+ #
29
+ #def local_files(root, glob, *flags)
30
+ # bits = flags.map{ |f| File.const_get("FNM_#{f.to_s.upcase}") }
31
+ # files = Dir.glob(File.join(root,glob), bits)
32
+ # files = files.map{ |f| f.sub(root,'') }
33
+ # files
34
+ #end
35
+ end
36
+
37
+ # Include GemspecImportation mixin into Builder class.
38
+ include GemspecImportation
39
+
40
+ end
41
+
42
+ end
43
+
@@ -0,0 +1,289 @@
1
+ module Indexer
2
+
3
+ class Importer
4
+
5
+ # Import metadata from a HTML source using microformats.
6
+ #
7
+ # NOTE: The implementation using css selectors is fairly slow.
8
+ # If we even think it important to speed up then we might
9
+ # try traversing instead.
10
+ #
11
+ module HTMLImportation
12
+
13
+ #
14
+ # YAML import procedure.
15
+ #
16
+ def import(source)
17
+ if File.file?(source)
18
+ case File.extname(source)
19
+ when '.html'
20
+ load_html(source)
21
+ return true
22
+ end
23
+ end
24
+ super(source) if defined?(super)
25
+ end
26
+
27
+ #
28
+ # Import metadata from HTML file.
29
+ #
30
+ def load_html(file)
31
+ require 'nokogiri'
32
+
33
+ case file
34
+ when Nokogiri::XML::Document
35
+ doc = file
36
+ when File
37
+ doc = Nokogiri::HTML(file)
38
+ else
39
+ doc = Nokogiri::HTML(File.new(file))
40
+ end
41
+
42
+ data = {}
43
+
44
+ %w{version summary description created}.each do |field|
45
+ load_html_simple(field, doc, data)
46
+ end
47
+
48
+ load_html_name(doc, data)
49
+ load_html_title(doc, data)
50
+ load_html_authors(doc, data)
51
+ load_html_organizations(doc, data)
52
+ load_html_requirements(doc, data)
53
+ load_html_resources(doc, data)
54
+ load_html_repositories(doc, data)
55
+ load_html_copyrights(doc, data)
56
+ load_html_categories(doc, data)
57
+
58
+ metadata.merge!(data)
59
+ end
60
+
61
+ #
62
+ # Load a simple field value.
63
+ #
64
+ def load_html_simple(field, doc, data)
65
+ nodes = doc.css(".i#{field}")
66
+ return if (nodes.nil? or nodes.empty?)
67
+ text = nodes.first.content.strip
68
+ data[field] = text
69
+ end
70
+
71
+ #
72
+ # Load name, and use it for title too if not already set.
73
+ #
74
+ def load_html_name(doc, data)
75
+ nodes = doc.css(".iname")
76
+ return if (nodes.nil? or nodes.empty?)
77
+ text = nodes.first.content.strip
78
+
79
+ unless metadata.title
80
+ data['title'] = text.capitalize
81
+ end
82
+
83
+ data['name'] = text
84
+ end
85
+
86
+ #
87
+ # Load title, and use it for name too if not already set.
88
+ #
89
+ def load_html_title(doc, data)
90
+ nodes = doc.css(".ititle")
91
+ return if (nodes.nil? or nodes.empty?)
92
+ text = nodes.first.content.strip
93
+
94
+ unless metadata.name
95
+ data['name'] = text.downcase.gsub(/\s+/, '_')
96
+ end
97
+
98
+ data['title'] = text
99
+ end
100
+
101
+ #
102
+ #
103
+ #
104
+ def load_html_categories(doc, data)
105
+ nodes = doc.css('.icategory')
106
+ return if (nodes.nil? or nodes.empty?)
107
+
108
+ data['categories'] ||= []
109
+
110
+ nodes.each do |node|
111
+ entry = node.content.strip
112
+ data['categories'] << entry unless entry == ""
113
+ end
114
+ end
115
+
116
+ #
117
+ #
118
+ #
119
+ def load_html_resources(doc, data)
120
+ nodes = doc.css('.iresource')
121
+ return if (nodes.nil? or nodes.empty?)
122
+
123
+ data['resources'] ||= []
124
+
125
+ nodes.each do |node|
126
+ entry = {}
127
+
128
+ entry['uri'] = node.attr('href')
129
+ entry['type'] = node.attr('name') || node.attr('title') # best choice for this?
130
+ entry['label'] = node.content.strip
131
+
132
+ data['resources'] << entry if entry['uri']
133
+ end
134
+ end
135
+
136
+ #
137
+ #
138
+ #
139
+ def load_html_requirements(doc, data)
140
+ nodes = doc.css('.irequirement')
141
+ return if (nodes.nil? or nodes.empty?)
142
+
143
+ data['requirements'] ||= []
144
+
145
+ nodes.each do |node|
146
+ entry = {}
147
+
148
+ if n = node.at_css('.name')
149
+ entry['name'] = n.content.strip
150
+ end
151
+
152
+ if n = node.at_css('.version')
153
+ entry['version'] = n.content.strip
154
+ end
155
+
156
+ if n = (node.at_css('.groups') || node.at_css('.group'))
157
+ text = n.content.strip
158
+ text = text.sub(/^[(]/, '').sub(/[)]$/, '').strip
159
+ entry['groups'] = text.split(/\s+/)
160
+
161
+ if %w{test build document development}.any?{ |g| entry['groups'].include?(g) }
162
+ entry['development'] = true
163
+ end
164
+ end
165
+
166
+ data['requirements'] << entry if entry['name']
167
+ end
168
+ end
169
+
170
+ #
171
+ # Class is `iauthor`.
172
+ #
173
+ def load_html_authors(doc, data)
174
+ nodes = doc.css('.iauthor')
175
+ return if (nodes.nil? or nodes.empty?)
176
+
177
+ data['authors'] ||= []
178
+
179
+ nodes.each do |node|
180
+ entry = {}
181
+
182
+ if n = (node.at_css('.name') || node.at_css('.nickname'))
183
+ entry['name'] = n.content.strip
184
+ end
185
+
186
+ if n = node.at_css('.email')
187
+ text = n.attr(:href) || n.content.strip
188
+ text = text.sub(/^mailto\:/i, '')
189
+ entry['email'] = text
190
+ end
191
+
192
+ if n = node.at_css('.website') || node.at_css('.uri') || node.at_css('.url')
193
+ text = n.attr(:href) || n.content.strip
194
+ entry['website'] = text
195
+ end
196
+
197
+ data['authors'] << entry if entry['name']
198
+ end
199
+ end
200
+
201
+ #
202
+ # Class is `iorg`.
203
+ #
204
+ def load_html_organizations(doc, data)
205
+ nodes = doc.css('.iorg')
206
+ return if (nodes.nil? or nodes.empty?)
207
+
208
+ data['organizations'] ||= []
209
+
210
+ nodes.each do |node|
211
+ entry = {}
212
+
213
+ if n = node.at_css('.name')
214
+ entry['name'] = n.content.strip
215
+ end
216
+
217
+ if n = node.at_css('.email')
218
+ text = n.attr(:href) || n.content.strip
219
+ text = text.sub(/^mailto\:/i, '')
220
+ entry['email'] = text
221
+ end
222
+
223
+ if n = node.at_css('.website') || node.at_css('.uri') || node.at_css('.url')
224
+ text = n.attr(:href) || n.content.strip
225
+ entry['website'] = text
226
+ end
227
+
228
+ data['organizations'] << entry if entry['name']
229
+ end
230
+ end
231
+
232
+ #
233
+ # Class is `irepo`.
234
+ #
235
+ def load_html_repositories(doc, data)
236
+ nodes = doc.css('.irepo')
237
+ return if (nodes.nil? or nodes.empty?)
238
+
239
+ data['repositories'] ||= []
240
+
241
+ nodes.each do |node|
242
+ entry = {}
243
+
244
+ entry['uri'] = node.attr('href')
245
+ entry['type'] = node.attr('name') || node.attr('title') # best choice for this?
246
+ entry['label'] = node.content.strip
247
+
248
+ data['resources'] << entry if entry['uri']
249
+ end
250
+ end
251
+
252
+ #
253
+ #
254
+ #
255
+ def load_html_copyrights(doc, data)
256
+ nodes = doc.css('.icopyright')
257
+ return if (nodes.nil? or nodes.empty?)
258
+
259
+ data['copyrights'] ||= []
260
+
261
+ nodes.each do |node|
262
+ entry = {}
263
+
264
+ if n = node.at_css('.holder')
265
+ entry['holder'] = n.content.strip
266
+ end
267
+
268
+ if n = node.at_css('.year')
269
+ entry['year'] = n.content.strip
270
+ end
271
+
272
+ if n = node.at_css('.license')
273
+ text = n.content.strip
274
+ text = text.sub(/license$/i,'').strip
275
+ entry['license'] = text
276
+ end
277
+
278
+ data['copyrights'] << entry
279
+ end
280
+ end
281
+
282
+ end
283
+
284
+ # Include YAMLImportation mixin into Builder class.
285
+ include HTMLImportation
286
+
287
+ end
288
+
289
+ end