indexer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/.index +54 -0
  2. data/HISTORY.md +9 -0
  3. data/README.md +145 -0
  4. data/bin/index +7 -0
  5. data/data/indexer/r2013/index.kwalify +175 -0
  6. data/data/indexer/r2013/index.yes +172 -0
  7. data/data/indexer/r2013/index.yesi +67 -0
  8. data/data/indexer/r2013/ruby.txt +35 -0
  9. data/data/indexer/r2013/yaml.txt +30 -0
  10. data/lib/indexer.rb +65 -0
  11. data/lib/indexer/attributes.rb +171 -0
  12. data/lib/indexer/command.rb +260 -0
  13. data/lib/indexer/components.rb +8 -0
  14. data/lib/indexer/components/author.rb +140 -0
  15. data/lib/indexer/components/conflict.rb +78 -0
  16. data/lib/indexer/components/copyright.rb +95 -0
  17. data/lib/indexer/components/dependency.rb +18 -0
  18. data/lib/indexer/components/organization.rb +133 -0
  19. data/lib/indexer/components/repository.rb +140 -0
  20. data/lib/indexer/components/requirement.rb +360 -0
  21. data/lib/indexer/components/resource.rb +209 -0
  22. data/lib/indexer/conversion.rb +14 -0
  23. data/lib/indexer/conversion/gemfile.rb +44 -0
  24. data/lib/indexer/conversion/gemspec.rb +114 -0
  25. data/lib/indexer/conversion/gemspec_exporter.rb +304 -0
  26. data/lib/indexer/core_ext.rb +4 -0
  27. data/lib/indexer/error.rb +23 -0
  28. data/lib/indexer/gemfile.rb +75 -0
  29. data/lib/indexer/importer.rb +144 -0
  30. data/lib/indexer/importer/file.rb +94 -0
  31. data/lib/indexer/importer/gemfile.rb +27 -0
  32. data/lib/indexer/importer/gemspec.rb +43 -0
  33. data/lib/indexer/importer/html.rb +289 -0
  34. data/lib/indexer/importer/markdown.rb +45 -0
  35. data/lib/indexer/importer/ruby.rb +47 -0
  36. data/lib/indexer/importer/version.rb +38 -0
  37. data/lib/indexer/importer/yaml.rb +46 -0
  38. data/lib/indexer/loadable.rb +159 -0
  39. data/lib/indexer/metadata.rb +879 -0
  40. data/lib/indexer/model.rb +237 -0
  41. data/lib/indexer/revision.rb +43 -0
  42. data/lib/indexer/valid.rb +287 -0
  43. data/lib/indexer/validator.rb +313 -0
  44. data/lib/indexer/version/constraint.rb +124 -0
  45. data/lib/indexer/version/exceptions.rb +11 -0
  46. data/lib/indexer/version/number.rb +497 -0
  47. metadata +141 -0
@@ -0,0 +1,4 @@
1
+ require 'indexer/core_ext/hash/to_h'
2
+ require 'indexer/core_ext/hash/rekey'
3
+ require 'indexer/core_ext/kernel/cli'
4
+
@@ -0,0 +1,23 @@
1
+ module Indexer
2
+
3
+ # Tag module for Metaspec Exceptions.
4
+ #
5
+ # Use this module to extend arbitrary errors raised by Metaspec,
6
+ # so they can be easily identified as Metaspec errors if need be.
7
+ module Error
8
+ # Just catch the error and raise this instead.
9
+ def self.exception(msg=nil,orig=$!)
10
+ if Class === orig
11
+ orig = orig.new(msg)
12
+ elsif orig.nil?
13
+ orig = StandardError.new(msg)
14
+ else
15
+ orig = orig.exception(msg) if msg
16
+ end
17
+ orig.extend self
18
+ orig
19
+ end
20
+ end
21
+
22
+ end
23
+
@@ -0,0 +1,75 @@
1
+ #module Indexer
2
+
3
+ # Make sure Indexer is loaded.
4
+ require 'indexer' unless defined?(Indexer)
5
+
6
+ require 'bundler'
7
+
8
+ # Bundler integration.
9
+ #
10
+ # This does not support Bundler's `:git` references
11
+ # or `:require` option (at least not yet).
12
+ #
13
+ module Bundler
14
+
15
+ # Mixin for Bundler::Dsl.
16
+ #
17
+ class Dsl
18
+
19
+ #
20
+ # Dynamically update a Gemfile from the `.index` file. Just call `index`
21
+ # from your Gemfile.
22
+ #
23
+ # rubyfile
24
+ #
25
+ # This is analogous to the Gemfile's `gemspec` method.
26
+ #
27
+ def index
28
+ spec = Indexer::Metadata.open
29
+ spec.requirements.each do |req|
30
+ next if req.external?
31
+ gem(req.name, req.version, :group=>req.groups)
32
+ end
33
+ end
34
+
35
+ #
36
+ def metadata
37
+ @metadata ||= Indexer::Metadata.new
38
+ end
39
+
40
+ #
41
+ alias :_method_missing :method_missing
42
+
43
+ #
44
+ # Evaluating on the Builder instance, allows Ruby basic metadata
45
+ # to be built via this method.
46
+ #
47
+ def method_missing(s, *a, &b)
48
+ r = s.to_s.chomp('=')
49
+ case a.size
50
+ when 0
51
+ if metadata.respond_to?(s)
52
+ return metadata.__send__(s, &b)
53
+ end
54
+ when 1
55
+ if metadata.respond_to?("#{r}=")
56
+ return metadata.__send__("#{r}=", *a)
57
+ end
58
+ else
59
+ if metadata.respond_to?("#{r}=")
60
+ return metadata.__send__("#{r}=", a)
61
+ end
62
+ end
63
+
64
+ _method_missing(s, *a, &b)
65
+ #super(s, *a, &b) # if cases don't match-up
66
+ end
67
+
68
+ end
69
+
70
+ end
71
+
72
+ #end
73
+
74
+ #::Bundler::Dsl.__send__(:include, Indexer::Bundler::Dsl)
75
+
@@ -0,0 +1,144 @@
1
+ module Indexer
2
+
3
+ # Import external sources into metadata.
4
+ #
5
+ def self.import(*sources)
6
+ Importer.import(*sources)
7
+ end
8
+
9
+ # Importer class takes disperate data sources and imports them
10
+ # into a Metadata instance.
11
+ #
12
+ # Mixins are used to inject import behavior by overriding the `#import` method.
13
+ # Any such mixin's #import method must call `#super` if it's method doesn't
14
+ # apply, allowing the routine to fallback the other possible import methods.
15
+ #
16
+ class Importer
17
+
18
+ #
19
+ # Require all import mixins.
20
+ #
21
+ def self.require_importers
22
+ require_relative 'importer/file'
23
+ require_relative 'importer/ruby'
24
+ require_relative 'importer/yaml'
25
+ require_relative 'importer/html'
26
+ require_relative 'importer/markdown'
27
+ #require_relative 'importer/rdoc'
28
+ #require_relative 'importer/textile'
29
+ require_relative 'importer/gemspec'
30
+ require_relative 'importer/gemfile'
31
+ require_relative 'importer/version'
32
+ end
33
+
34
+ #
35
+ # Import metadata from external sources.
36
+ #
37
+ def self.import(*source)
38
+ options = (Hash === source.last ? source.pop : {})
39
+
40
+ require_importers
41
+
42
+ #metadata = nil
43
+
44
+ ## use source of current metadata if none given
45
+ ## TODO: Only search the current directory or search up to root?
46
+ if source.empty?
47
+ if file = Dir[LOCK_FILE].first #or `Metadata.exists?` ?
48
+ data = YAML.load_file(file)
49
+ source = Array(data['source'])
50
+ end
51
+ end
52
+
53
+ if source.empty?
54
+ source = [USER_FILE]
55
+ end
56
+
57
+ source.each do |file|
58
+ unless File.exist?(file)
59
+ warn "metadata source file not found - `#{file}'"
60
+ end
61
+ end
62
+
63
+ importer = Importer.new #(metadata)
64
+
65
+ source.each do |src|
66
+ importer.import(src)
67
+ end
68
+
69
+ return importer.metadata
70
+ end
71
+
72
+ #
73
+ # Initialize importer.
74
+ #
75
+ def initialize(metadata=nil)
76
+ @metadata = metadata || Metadata.new
77
+ @file_cache = {}
78
+ end
79
+
80
+ #
81
+ # Metadata being built.
82
+ #
83
+ attr :metadata
84
+
85
+ #
86
+ #
87
+ #
88
+ def import(source)
89
+ success = super(source) if defined?(super)
90
+ if success
91
+ metadata.sources << source unless metadata.sources.include?(source)
92
+ else
93
+ raise "metadata source not found or not a known type -- #{source}"
94
+ end
95
+ end
96
+
97
+ #
98
+ # Provides a file contents cache. This is used by the YAMLImportation
99
+ # script, for instance, to see if the file begins with `---`, in
100
+ # which case the file is taken to be YAML format, even if the
101
+ # file's extension is not `.yml` or `.yaml`.
102
+ #
103
+ def read(file)
104
+ @file_cache[file] ||= File.read(file)
105
+ end
106
+
107
+ #
108
+ # Evaluating on the Importer instance, allows Ruby basic metadata
109
+ # to be built via this method.
110
+ #
111
+ def method_missing(s, *a, &b)
112
+ return if s == :import
113
+
114
+ r = s.to_s.chomp('=')
115
+ case a.size
116
+ when 0
117
+ if metadata.respond_to?(s)
118
+ return metadata.__send__(s, &b)
119
+ end
120
+ when 1
121
+ if metadata.respond_to?("#{r}=")
122
+ return metadata.__send__("#{r}=", *a)
123
+ end
124
+ else
125
+ if metadata.respond_to?("#{r}=")
126
+ return metadata.__send__("#{r}=", a)
127
+ end
128
+ end
129
+
130
+ super(s, *a, &b) # if cases don't match-up
131
+ end
132
+
133
+ #
134
+ # Is `text` a YAML document? It detrmines this simply
135
+ # be checking for `---` at the top of the text.
136
+ #
137
+ # @todo Ignore top comments.
138
+ #
139
+ def yaml?(text)
140
+ text =~ /\A(---|%TAG|%YAML)/
141
+ end
142
+ end
143
+
144
+ end
@@ -0,0 +1,94 @@
1
+ module Indexer
2
+
3
+ class Importer
4
+
5
+ # Import metadata from individual files.
6
+ #
7
+ module FileImportation
8
+
9
+ #
10
+ # Files import procedure.
11
+ #
12
+ def import(source)
13
+ if File.directory?(source)
14
+ load_directory(source)
15
+ true
16
+ else
17
+ super(source) if defined?(super)
18
+ end
19
+ end
20
+
21
+ #
22
+ # Import files from a given directory. This will only import files
23
+ # that have a name corresponding to a metadata attribute, unless
24
+ # the file is listed in a `.index_extra` file within the directory.
25
+ #
26
+ # However, files with an extension of `.yml` or `.yaml` will be loaded
27
+ # wholeclothe and not as a single attribute.
28
+ #
29
+ # @todo Subdirectories are simply omitted. Maybe do otherwise in future?
30
+ #
31
+ def load_directory(folder)
32
+ if File.directory?(folder)
33
+ extra = []
34
+ extra_file = File.join(folder, '.index_extra')
35
+ if File.exist?(extra_file)
36
+ extra = File.read(extra_file).split("\n")
37
+ extra = extra.collect{ |pattern| pattern.strip }
38
+ extra = extra.reject { |pattern| pattern.empty? }
39
+ extra = extra.collect{ |pattern| Dir[File.join(folder, pattern)] }.flatten
40
+ end
41
+ files = Dir[File.join(folder, '*')]
42
+ files.each do |file|
43
+ next if File.directory?(file)
44
+ name = File.basename(file).downcase
45
+ next load_yaml(file) if %w{.yaml .yml}.include?(File.extname(file))
46
+ next load_field_file(file) if extra.include?(name)
47
+ next load_field_file(file) if metadata.attributes.include?(name.to_sym)
48
+ end
49
+ end
50
+ end
51
+
52
+ #
53
+ # Import a field setting from a file.
54
+ #
55
+ # TODO: Ultimately support JSON and maybe other types, and possibly
56
+ # use mime-types library to recognize them.
57
+ #
58
+ def load_field_file(file)
59
+ if File.directory?(file)
60
+ # ...
61
+ else
62
+ case File.extname(file).downcase
63
+ when '.yaml', '.yml'
64
+ name = File.basename(file).downcase
65
+ name = name.chomp('.yaml').chomp('.yml')
66
+ metadata[name] = YAML.load_file(file)
67
+ # TODO: should yaml files with explict extension by merged instead?
68
+ #metadata.merge!(YAML.load_file(file))
69
+ when '.text', '.txt'
70
+ name = File.basename(file).downcase
71
+ name = name.chomp('.text').chomp('.txt')
72
+ text = File.read(file)
73
+ metadata[name] = text.strip
74
+ else
75
+ text = File.read(file)
76
+ if /\A---/ =~ text
77
+ name = File.basename(file).downcase
78
+ metadata[name] = YAML.load(text)
79
+ else
80
+ name = File.basename(file).downcase
81
+ metadata[name] = text.strip
82
+ end
83
+ end
84
+ end
85
+ end
86
+
87
+ end
88
+
89
+ # Include FileImportation mixin into Builder class.
90
+ include FileImportation
91
+
92
+ end
93
+
94
+ end
@@ -0,0 +1,27 @@
1
+ module Indexer
2
+
3
+ class Importer
4
+
5
+ # Build mixin for Bundler's Gemfile.
6
+ #
7
+ module GemfileImportation
8
+ #
9
+ # If the source file is a Gemfile, import it.
10
+ #
11
+ def import(source)
12
+ case source
13
+ when 'Gemfile'
14
+ metadata.import_gemfile(source)
15
+ true
16
+ else
17
+ super(source) if defined?(super)
18
+ end
19
+ end
20
+ end
21
+
22
+ # Include GemfileImportation mixin into Builder class.
23
+ include GemfileImportation
24
+
25
+ end
26
+
27
+ end
@@ -0,0 +1,43 @@
1
+ module Indexer
2
+
3
+ class Importer
4
+
5
+ # It is not the recommended that a .gemspec be the usual source of metadata.
6
+ # Rather it is recommended that a the gemspec be produced from the metadata
7
+ # instead. (Rumber's metadata covers almost every aspect of a emspec, and
8
+ # a gemspec can be augmented where needed.) Nonetheless, a gemspec can serve
9
+ # as a good soruce for creating an initial metadata file.
10
+ #
11
+ module GemspecImportation
12
+
13
+ #
14
+ # If the source file is a gemspec, import it.
15
+ #
16
+ def import(source)
17
+ case File.extname(source)
18
+ when '.gemspec'
19
+ # TODO: handle YAML-based gemspecs
20
+ gemspec = ::Gem::Specification.load(source)
21
+ metadata.import_gemspec(gemspec)
22
+ true
23
+ else
24
+ super(source) if defined?(super)
25
+ end
26
+ end
27
+
28
+ #
29
+ #def local_files(root, glob, *flags)
30
+ # bits = flags.map{ |f| File.const_get("FNM_#{f.to_s.upcase}") }
31
+ # files = Dir.glob(File.join(root,glob), bits)
32
+ # files = files.map{ |f| f.sub(root,'') }
33
+ # files
34
+ #end
35
+ end
36
+
37
+ # Include GemspecImportation mixin into Builder class.
38
+ include GemspecImportation
39
+
40
+ end
41
+
42
+ end
43
+
@@ -0,0 +1,289 @@
1
+ module Indexer
2
+
3
+ class Importer
4
+
5
+ # Import metadata from a HTML source using microformats.
6
+ #
7
+ # NOTE: The implementation using css selectors is fairly slow.
8
+ # If we even think it important to speed up then we might
9
+ # try traversing instead.
10
+ #
11
+ module HTMLImportation
12
+
13
+ #
14
+ # YAML import procedure.
15
+ #
16
+ def import(source)
17
+ if File.file?(source)
18
+ case File.extname(source)
19
+ when '.html'
20
+ load_html(source)
21
+ return true
22
+ end
23
+ end
24
+ super(source) if defined?(super)
25
+ end
26
+
27
+ #
28
+ # Import metadata from HTML file.
29
+ #
30
+ def load_html(file)
31
+ require 'nokogiri'
32
+
33
+ case file
34
+ when Nokogiri::XML::Document
35
+ doc = file
36
+ when File
37
+ doc = Nokogiri::HTML(file)
38
+ else
39
+ doc = Nokogiri::HTML(File.new(file))
40
+ end
41
+
42
+ data = {}
43
+
44
+ %w{version summary description created}.each do |field|
45
+ load_html_simple(field, doc, data)
46
+ end
47
+
48
+ load_html_name(doc, data)
49
+ load_html_title(doc, data)
50
+ load_html_authors(doc, data)
51
+ load_html_organizations(doc, data)
52
+ load_html_requirements(doc, data)
53
+ load_html_resources(doc, data)
54
+ load_html_repositories(doc, data)
55
+ load_html_copyrights(doc, data)
56
+ load_html_categories(doc, data)
57
+
58
+ metadata.merge!(data)
59
+ end
60
+
61
+ #
62
+ # Load a simple field value.
63
+ #
64
+ def load_html_simple(field, doc, data)
65
+ nodes = doc.css(".i#{field}")
66
+ return if (nodes.nil? or nodes.empty?)
67
+ text = nodes.first.content.strip
68
+ data[field] = text
69
+ end
70
+
71
+ #
72
+ # Load name, and use it for title too if not already set.
73
+ #
74
+ def load_html_name(doc, data)
75
+ nodes = doc.css(".iname")
76
+ return if (nodes.nil? or nodes.empty?)
77
+ text = nodes.first.content.strip
78
+
79
+ unless metadata.title
80
+ data['title'] = text.capitalize
81
+ end
82
+
83
+ data['name'] = text
84
+ end
85
+
86
+ #
87
+ # Load title, and use it for name too if not already set.
88
+ #
89
+ def load_html_title(doc, data)
90
+ nodes = doc.css(".ititle")
91
+ return if (nodes.nil? or nodes.empty?)
92
+ text = nodes.first.content.strip
93
+
94
+ unless metadata.name
95
+ data['name'] = text.downcase.gsub(/\s+/, '_')
96
+ end
97
+
98
+ data['title'] = text
99
+ end
100
+
101
+ #
102
+ #
103
+ #
104
+ def load_html_categories(doc, data)
105
+ nodes = doc.css('.icategory')
106
+ return if (nodes.nil? or nodes.empty?)
107
+
108
+ data['categories'] ||= []
109
+
110
+ nodes.each do |node|
111
+ entry = node.content.strip
112
+ data['categories'] << entry unless entry == ""
113
+ end
114
+ end
115
+
116
+ #
117
+ #
118
+ #
119
+ def load_html_resources(doc, data)
120
+ nodes = doc.css('.iresource')
121
+ return if (nodes.nil? or nodes.empty?)
122
+
123
+ data['resources'] ||= []
124
+
125
+ nodes.each do |node|
126
+ entry = {}
127
+
128
+ entry['uri'] = node.attr('href')
129
+ entry['type'] = node.attr('name') || node.attr('title') # best choice for this?
130
+ entry['label'] = node.content.strip
131
+
132
+ data['resources'] << entry if entry['uri']
133
+ end
134
+ end
135
+
136
+ #
137
+ #
138
+ #
139
+ def load_html_requirements(doc, data)
140
+ nodes = doc.css('.irequirement')
141
+ return if (nodes.nil? or nodes.empty?)
142
+
143
+ data['requirements'] ||= []
144
+
145
+ nodes.each do |node|
146
+ entry = {}
147
+
148
+ if n = node.at_css('.name')
149
+ entry['name'] = n.content.strip
150
+ end
151
+
152
+ if n = node.at_css('.version')
153
+ entry['version'] = n.content.strip
154
+ end
155
+
156
+ if n = (node.at_css('.groups') || node.at_css('.group'))
157
+ text = n.content.strip
158
+ text = text.sub(/^[(]/, '').sub(/[)]$/, '').strip
159
+ entry['groups'] = text.split(/\s+/)
160
+
161
+ if %w{test build document development}.any?{ |g| entry['groups'].include?(g) }
162
+ entry['development'] = true
163
+ end
164
+ end
165
+
166
+ data['requirements'] << entry if entry['name']
167
+ end
168
+ end
169
+
170
+ #
171
+ # Class is `iauthor`.
172
+ #
173
+ def load_html_authors(doc, data)
174
+ nodes = doc.css('.iauthor')
175
+ return if (nodes.nil? or nodes.empty?)
176
+
177
+ data['authors'] ||= []
178
+
179
+ nodes.each do |node|
180
+ entry = {}
181
+
182
+ if n = (node.at_css('.name') || node.at_css('.nickname'))
183
+ entry['name'] = n.content.strip
184
+ end
185
+
186
+ if n = node.at_css('.email')
187
+ text = n.attr(:href) || n.content.strip
188
+ text = text.sub(/^mailto\:/i, '')
189
+ entry['email'] = text
190
+ end
191
+
192
+ if n = node.at_css('.website') || node.at_css('.uri') || node.at_css('.url')
193
+ text = n.attr(:href) || n.content.strip
194
+ entry['website'] = text
195
+ end
196
+
197
+ data['authors'] << entry if entry['name']
198
+ end
199
+ end
200
+
201
+ #
202
+ # Class is `iorg`.
203
+ #
204
+ def load_html_organizations(doc, data)
205
+ nodes = doc.css('.iorg')
206
+ return if (nodes.nil? or nodes.empty?)
207
+
208
+ data['organizations'] ||= []
209
+
210
+ nodes.each do |node|
211
+ entry = {}
212
+
213
+ if n = node.at_css('.name')
214
+ entry['name'] = n.content.strip
215
+ end
216
+
217
+ if n = node.at_css('.email')
218
+ text = n.attr(:href) || n.content.strip
219
+ text = text.sub(/^mailto\:/i, '')
220
+ entry['email'] = text
221
+ end
222
+
223
+ if n = node.at_css('.website') || node.at_css('.uri') || node.at_css('.url')
224
+ text = n.attr(:href) || n.content.strip
225
+ entry['website'] = text
226
+ end
227
+
228
+ data['organizations'] << entry if entry['name']
229
+ end
230
+ end
231
+
232
+ #
233
+ # Class is `irepo`.
234
+ #
235
+ def load_html_repositories(doc, data)
236
+ nodes = doc.css('.irepo')
237
+ return if (nodes.nil? or nodes.empty?)
238
+
239
+ data['repositories'] ||= []
240
+
241
+ nodes.each do |node|
242
+ entry = {}
243
+
244
+ entry['uri'] = node.attr('href')
245
+ entry['type'] = node.attr('name') || node.attr('title') # best choice for this?
246
+ entry['label'] = node.content.strip
247
+
248
+ data['resources'] << entry if entry['uri']
249
+ end
250
+ end
251
+
252
+ #
253
+ #
254
+ #
255
+ def load_html_copyrights(doc, data)
256
+ nodes = doc.css('.icopyright')
257
+ return if (nodes.nil? or nodes.empty?)
258
+
259
+ data['copyrights'] ||= []
260
+
261
+ nodes.each do |node|
262
+ entry = {}
263
+
264
+ if n = node.at_css('.holder')
265
+ entry['holder'] = n.content.strip
266
+ end
267
+
268
+ if n = node.at_css('.year')
269
+ entry['year'] = n.content.strip
270
+ end
271
+
272
+ if n = node.at_css('.license')
273
+ text = n.content.strip
274
+ text = text.sub(/license$/i,'').strip
275
+ entry['license'] = text
276
+ end
277
+
278
+ data['copyrights'] << entry
279
+ end
280
+ end
281
+
282
+ end
283
+
284
+ # Include YAMLImportation mixin into Builder class.
285
+ include HTMLImportation
286
+
287
+ end
288
+
289
+ end