right_scraper 3.2.6 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/lib/right_scraper.rb +16 -34
  3. data/lib/right_scraper/builders.rb +32 -0
  4. data/lib/right_scraper/builders/base.rb +19 -20
  5. data/lib/right_scraper/builders/filesystem.rb +8 -6
  6. data/lib/right_scraper/builders/union.rb +4 -1
  7. data/lib/right_scraper/loggers.rb +31 -0
  8. data/lib/right_scraper/loggers/base.rb +113 -0
  9. data/lib/right_scraper/loggers/default.rb +98 -0
  10. data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
  11. data/lib/right_scraper/processes.rb +33 -0
  12. data/lib/right_scraper/processes/shell.rb +227 -0
  13. data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
  14. data/lib/right_scraper/processes/svn_client.rb +117 -0
  15. data/lib/right_scraper/processes/warden.rb +358 -0
  16. data/lib/right_scraper/registered_base.rb +154 -0
  17. data/lib/right_scraper/repositories.rb +33 -0
  18. data/lib/right_scraper/repositories/base.rb +271 -232
  19. data/lib/right_scraper/repositories/download.rb +8 -6
  20. data/lib/right_scraper/repositories/git.rb +8 -9
  21. data/lib/right_scraper/repositories/svn.rb +8 -8
  22. data/lib/right_scraper/resources.rb +32 -0
  23. data/lib/right_scraper/resources/base.rb +5 -1
  24. data/lib/right_scraper/resources/cookbook.rb +34 -27
  25. data/lib/right_scraper/resources/workflow.rb +27 -28
  26. data/lib/right_scraper/retrievers.rb +34 -0
  27. data/lib/right_scraper/retrievers/base.rb +80 -84
  28. data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
  29. data/lib/right_scraper/retrievers/download.rb +125 -117
  30. data/lib/right_scraper/retrievers/git.rb +377 -223
  31. data/lib/right_scraper/retrievers/svn.rb +102 -62
  32. data/lib/right_scraper/scanners.rb +37 -0
  33. data/lib/right_scraper/scanners/base.rb +77 -80
  34. data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
  35. data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
  36. data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
  37. data/lib/right_scraper/scanners/union.rb +61 -58
  38. data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
  39. data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
  40. data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
  41. data/lib/right_scraper/scrapers.rb +32 -0
  42. data/lib/right_scraper/scrapers/base.rb +217 -205
  43. data/lib/right_scraper/scrapers/cookbook.rb +42 -40
  44. data/lib/right_scraper/scrapers/workflow.rb +57 -58
  45. data/lib/right_scraper/version.rb +3 -0
  46. data/right_scraper.gemspec +12 -16
  47. metadata +57 -163
  48. data/Gemfile +0 -15
  49. data/Rakefile +0 -89
  50. data/lib/right_scraper/logger.rb +0 -107
  51. data/lib/right_scraper/loggers/noisy.rb +0 -85
  52. data/lib/right_scraper/repositories/mock.rb +0 -70
  53. data/lib/right_scraper/retrievers/checkout.rb +0 -79
  54. data/lib/right_scraper/scraper_logger.rb +0 -66
  55. data/lib/right_scraper/svn_client.rb +0 -164
  56. data/right_scraper.rconf +0 -13
  57. data/spec/builder_spec.rb +0 -50
  58. data/spec/cookbook_helper.rb +0 -73
  59. data/spec/cookbook_manifest_spec.rb +0 -93
  60. data/spec/cookbook_s3_upload_spec.rb +0 -159
  61. data/spec/download/download_retriever_spec.rb +0 -118
  62. data/spec/download/download_retriever_spec_helper.rb +0 -72
  63. data/spec/download/download_spec.rb +0 -128
  64. data/spec/download/multi_dir_spec.rb +0 -106
  65. data/spec/download/multi_dir_spec_helper.rb +0 -40
  66. data/spec/git/cookbook_spec.rb +0 -165
  67. data/spec/git/demokey +0 -27
  68. data/spec/git/demokey.pub +0 -1
  69. data/spec/git/password_key +0 -30
  70. data/spec/git/password_key.pub +0 -1
  71. data/spec/git/repository_spec.rb +0 -110
  72. data/spec/git/retriever_spec.rb +0 -553
  73. data/spec/git/retriever_spec_helper.rb +0 -112
  74. data/spec/git/scraper_spec.rb +0 -151
  75. data/spec/git/ssh_spec.rb +0 -174
  76. data/spec/git/url_spec.rb +0 -103
  77. data/spec/logger_spec.rb +0 -185
  78. data/spec/repository_spec.rb +0 -111
  79. data/spec/retriever_spec_helper.rb +0 -146
  80. data/spec/scanner_spec.rb +0 -61
  81. data/spec/scraper_helper.rb +0 -88
  82. data/spec/scraper_spec.rb +0 -147
  83. data/spec/spec_helper.rb +0 -185
  84. data/spec/svn/cookbook_spec.rb +0 -96
  85. data/spec/svn/multi_svn_spec.rb +0 -64
  86. data/spec/svn/multi_svn_spec_helper.rb +0 -40
  87. data/spec/svn/repository_spec.rb +0 -72
  88. data/spec/svn/retriever_spec.rb +0 -266
  89. data/spec/svn/scraper_spec.rb +0 -90
  90. data/spec/svn/svn_retriever_spec_helper.rb +0 -90
  91. data/spec/svn/url_spec.rb +0 -47
  92. data/spec/url_spec.rb +0 -164
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -21,69 +21,72 @@
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
- module RightScraper
25
- module Scanners
26
- # Union scanner, to permit running multiple scanners while only
27
- # walking the fs once.
28
- class Union
29
- # Create a new union scanner. Recognizes no new options.
30
- #
31
- # === Parameters
32
- # classes(List):: List of Scanner classes to run
33
- # options(Hash):: scanner options
34
- def initialize(classes, options={})
35
- @subscanners = classes.map {|klass| klass.new(options)}
36
- end
24
+ # ancestor
25
+ require 'right_scraper/scanners'
37
26
 
38
- # Notify subscanners that all scans have completed.
39
- def finish
40
- @subscanners.each {|scanner| scanner.finish}
41
- end
27
+ module RightScraper::Scanners
42
28
 
43
- # Begin a scan for the given resource.
44
- #
45
- # === Parameters
46
- # resource(RightScraper::Resource::Base):: resource to scan
47
- def begin(resource)
48
- @subscanners.each {|scanner| scanner.begin(resource)}
49
- end
29
+ # Union scanner, to permit running multiple scanners while only
30
+ # walking the fs once.
31
+ class Union
50
32
 
51
- # Finish a scan for the given resource.
52
- #
53
- # === Parameters
54
- # resource(RightScraper::Resource::Base):: resource that just finished scanning
55
- def end(resource)
56
- @subscanners.each {|scanner| scanner.end(resource)}
57
- end
33
+ # Create a new union scanner. Recognizes no new options.
34
+ #
35
+ # === Parameters
36
+ # classes(List):: List of Scanner classes to run
37
+ # options(Hash):: scanner options
38
+ def initialize(classes, options={})
39
+ @subscanners = classes.map {|klass| klass.new(options)}
40
+ end
41
+
42
+ # Notify subscanners that all scans have completed.
43
+ def finish
44
+ @subscanners.each {|scanner| scanner.finish}
45
+ end
46
+
47
+ # Begin a scan for the given resource.
48
+ #
49
+ # === Parameters
50
+ # resource(RightScraper::Resource::Base):: resource to scan
51
+ def begin(resource)
52
+ @subscanners.each {|scanner| scanner.begin(resource)}
53
+ end
54
+
55
+ # Finish a scan for the given resource.
56
+ #
57
+ # === Parameters
58
+ # resource(RightScraper::Resource::Base):: resource that just finished scanning
59
+ def end(resource)
60
+ @subscanners.each {|scanner| scanner.end(resource)}
61
+ end
58
62
 
59
- # Notice a file during scanning.
60
- #
61
- # === Block
62
- # Return the data for this file. We use a block because it may
63
- # not always be necessary to read the data.
64
- #
65
- # === Parameters
66
- # relative_position(String):: relative pathname for the file from the root of resource
67
- def notice(relative_position)
68
- data = nil
69
- @subscanners.each {|scanner| scanner.notice(relative_position) {
70
- data = yield if data.nil?
71
- data
72
- }
63
+ # Notice a file during scanning.
64
+ #
65
+ # === Block
66
+ # Return the data for this file. We use a block because it may
67
+ # not always be necessary to read the data.
68
+ #
69
+ # === Parameters
70
+ # relative_position(String):: relative pathname for the file from the root of resource
71
+ def notice(relative_position)
72
+ data = nil
73
+ @subscanners.each {|scanner| scanner.notice(relative_position) {
74
+ data = yield if data.nil?
75
+ data
73
76
  }
74
- end
77
+ }
78
+ end
75
79
 
76
- # Notice a directory during scanning. Returns true if any of the
77
- # subscanners report that they should recurse into the directory.
78
- #
79
- # === Parameters
80
- # relative_position(String):: relative pathname for directory from root of resource
81
- #
82
- # === Returns
83
- # Boolean:: should the scanning recurse into the directory
84
- def notice_dir(relative_position)
85
- @subscanners.any? {|scanner| scanner.notice_dir(relative_position)}
86
- end
80
+ # Notice a directory during scanning. Returns true if any of the
81
+ # subscanners report that they should recurse into the directory.
82
+ #
83
+ # === Parameters
84
+ # relative_position(String):: relative pathname for directory from root of resource
85
+ #
86
+ # === Returns
87
+ # Boolean:: should the scanning recurse into the directory
88
+ def notice_dir(relative_position)
89
+ @subscanners.any? {|scanner| scanner.notice_dir(relative_position)}
87
90
  end
88
91
  end
89
92
  end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -21,66 +21,67 @@
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
- require File.expand_path(File.join(File.dirname(__FILE__), 'base'))
24
+ # ancestor
25
+ require 'right_scraper/scanners'
26
+
25
27
  require 'digest/sha1'
26
28
 
27
- module RightScraper
28
- module Scanners
29
- # Build manifests from a filesystem.
30
- class WorkflowManifest < Base
31
- # Create a new manifest scanner. Does not accept any new arguments.
32
- def initialize(*args)
33
- super
34
- @manifest = {}
35
- end
29
+ module RightScraper::Scanners
36
30
 
37
- # Retrieve relative workflow files positions
38
- #
39
- # === Parameters
40
- # workflow(Resources::Workflow):: Workflow whose manifest is being built
41
- def begin(workflow)
42
- @workflow = workflow
43
- @metadata_filename = File.basename(@workflow.metadata_path)
44
- @definition_filename = File.basename(@workflow.definition_path)
45
- end
31
+ # Build manifests from a filesystem.
32
+ class WorkflowManifest < ::RightScraper::Scanners::Base
33
+ # Create a new manifest scanner. Does not accept any new arguments.
34
+ def initialize(*args)
35
+ super
36
+ @manifest = {}
37
+ end
46
38
 
47
- # Complete a scan for the given resource.
48
- #
49
- # === Parameters ===
50
- # resource(RightScraper::Resources::Base):: resource to scan
51
- def end(resource)
52
- resource.manifest = @manifest
53
- @manifest = {}
54
- end
39
+ # Retrieve relative workflow files positions
40
+ #
41
+ # === Parameters
42
+ # workflow(Resources::Workflow):: Workflow whose manifest is being built
43
+ def begin(workflow)
44
+ @workflow = workflow
45
+ @metadata_filename = File.basename(@workflow.metadata_path)
46
+ @definition_filename = File.basename(@workflow.definition_path)
47
+ end
55
48
 
56
- # Notice a file during scanning.
57
- #
58
- # === Block ===
59
- # Return the data for this file. We use a block because it may
60
- # not always be necessary to read the data.
61
- #
62
- # === Parameters ===
63
- # relative_position(String):: relative pathname for file from root of resource
64
- def notice(relative_position)
65
- if [ @metadata_filename, @definition_filename ].include?(relative_position)
66
- @manifest[relative_position] = Digest::SHA1.hexdigest(yield)
67
- end
68
- end
49
+ # Complete a scan for the given resource.
50
+ #
51
+ # === Parameters ===
52
+ # resource(RightScraper::Resources::Base):: resource to scan
53
+ def end(resource)
54
+ resource.manifest = @manifest
55
+ @manifest = {}
56
+ end
69
57
 
70
- # Notice a directory during scanning. Since the workflow definition and
71
- # metadata live in the root directory we don't need to recurse,
72
- # but we do need to go into the first directory (identified by
73
- # +relative_position+ being +nil+).
74
- #
75
- # === Parameters
76
- # relative_position(String):: relative pathname for the directory from root of workflow
77
- #
78
- # === Returns
79
- # Boolean:: should the scanning recurse into the directory
80
- def notice_dir(relative_position)
81
- relative_position == nil
58
+ # Notice a file during scanning.
59
+ #
60
+ # === Block ===
61
+ # Return the data for this file. We use a block because it may
62
+ # not always be necessary to read the data.
63
+ #
64
+ # === Parameters ===
65
+ # relative_position(String):: relative pathname for file from root of resource
66
+ def notice(relative_position)
67
+ if [ @metadata_filename, @definition_filename ].include?(relative_position)
68
+ @manifest[relative_position] = Digest::SHA1.hexdigest(yield)
82
69
  end
83
-
84
70
  end
71
+
72
+ # Notice a directory during scanning. Since the workflow definition and
73
+ # metadata live in the root directory we don't need to recurse,
74
+ # but we do need to go into the first directory (identified by
75
+ # +relative_position+ being +nil+).
76
+ #
77
+ # === Parameters
78
+ # relative_position(String):: relative pathname for the directory from root of workflow
79
+ #
80
+ # === Returns
81
+ # Boolean:: should the scanning recurse into the directory
82
+ def notice_dir(relative_position)
83
+ relative_position == nil
84
+ end
85
+
85
86
  end
86
87
  end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -21,50 +21,52 @@
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
+ # ancestor
25
+ require 'right_scraper/scanners'
26
+
24
27
  require 'json'
25
28
 
26
- module RightScraper
27
- module Scanners
28
- # Load workflow metadata from a filesystem.
29
- class WorkflowMetadata < Base
30
- # Begin a scan for the given workflow.
31
- #
32
- # === Parameters
33
- # workflow(RightScraper::Resources::Workflow):: workflow to scan
34
- def begin(workflow)
35
- @workflow = workflow
36
- @metadata_filename = File.basename(workflow.metadata_path)
37
- end
29
+ module RightScraper::Scanners
30
+
31
+ # Load workflow metadata from a filesystem.
32
+ class WorkflowMetadata < ::RightScraper::Scanners::Base
33
+ # Begin a scan for the given workflow.
34
+ #
35
+ # === Parameters
36
+ # workflow(RightScraper::Resources::Workflow):: workflow to scan
37
+ def begin(workflow)
38
+ @workflow = workflow
39
+ @metadata_filename = File.basename(workflow.metadata_path)
40
+ end
38
41
 
39
- # Notice a file during scanning.
40
- #
41
- # === Block
42
- # Return the data for this file. We use a block because it may
43
- # not always be necessary to read the data.
44
- #
45
- # === Parameters
46
- # relative_position(String):: relative pathname for the file from root of workflow
47
- def notice(relative_position)
48
- if relative_position == @metadata_filename
49
- @logger.operation(:metadata_parsing) do
50
- @workflow.metadata = JSON.parse(yield)
51
- end
42
+ # Notice a file during scanning.
43
+ #
44
+ # === Block
45
+ # Return the data for this file. We use a block because it may
46
+ # not always be necessary to read the data.
47
+ #
48
+ # === Parameters
49
+ # relative_position(String):: relative pathname for the file from root of workflow
50
+ def notice(relative_position)
51
+ if relative_position == @metadata_filename
52
+ @logger.operation(:metadata_parsing) do
53
+ @workflow.metadata = JSON.parse(yield)
52
54
  end
53
55
  end
56
+ end
54
57
 
55
- # Notice a directory during scanning. Since the workflow definition and
56
- # metadata live in the root directory we don't need to recurse,
57
- # but we do need to go into the first directory (identified by
58
- # +relative_position+ being +nil+).
59
- #
60
- # === Parameters
61
- # relative_position(String):: relative pathname for the directory from root of workflow
62
- #
63
- # === Returns
64
- # Boolean:: should the scanning recurse into the directory
65
- def notice_dir(relative_position)
66
- relative_position == nil
67
- end
58
+ # Notice a directory during scanning. Since the workflow definition and
59
+ # metadata live in the root directory we don't need to recurse,
60
+ # but we do need to go into the first directory (identified by
61
+ # +relative_position+ being +nil+).
62
+ #
63
+ # === Parameters
64
+ # relative_position(String):: relative pathname for the directory from root of workflow
65
+ #
66
+ # === Returns
67
+ # Boolean:: should the scanning recurse into the directory
68
+ def notice_dir(relative_position)
69
+ relative_position == nil
68
70
  end
69
71
  end
70
72
  end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -20,65 +20,69 @@
20
20
  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
+
24
+ # ancestor
25
+ require 'right_scraper/scanners'
26
+
23
27
  require 'right_aws'
24
28
  require 'json'
25
29
 
26
- module RightScraper
27
- module Scanners
28
- # Upload workflow definition and metadata to an S3 bucket.
29
- class WorkflowS3Upload < Base
30
- # Create a new S3Upload. In addition to the options recognized
31
- # by Scanner, this class recognizes <tt>:s3_key</tt>,
32
- # <tt>:s3_secret</tt>, and <tt>:s3_bucket</tt> and requires all
33
- # of those.
34
- #
35
- # === Options
36
- # <tt>:s3_key</tt>:: Required. S3 access key.
37
- # <tt>:s3_secret</tt>:: Required. S3 secret key.
38
- # <tt>:s3_bucket</tt>:: Required. Bucket to upload workflows to.
39
- #
40
- # === Parameters
41
- # options(Hash):: scanner options
42
- def initialize(options={})
43
- super
44
- s3_key = options.fetch(:s3_key)
45
- s3_secret = options.fetch(:s3_secret)
46
- s3 = RightAws::S3.new(aws_access_key_id=s3_key,
47
- aws_secret_access_key=s3_secret,
48
- :logger => Logger.new)
49
- @bucket = s3.bucket(options.fetch(:s3_bucket))
50
- raise "Need an actual, existing S3 bucket!" if @bucket.nil?
51
- end
30
+ module RightScraper::Scanners
52
31
 
53
- # Upon ending a scan for a workflows, upload the workflows
54
- # contents to S3.
55
- #
56
- # === Parameters
57
- # workflows(RightScraper::Workflows):: Workflow to scan
58
- def end(workflow)
59
- @bucket.put(File.join('Workflows', workflow.resource_hash),
60
- {
61
- :metadata => workflow.metadata,
62
- :manifest => workflow.manifest
63
- }.to_json)
64
- end
32
+ # Upload workflow definition and metadata to an S3 bucket.
33
+ class WorkflowS3Upload < ::RightScraper::Scanners::Base
34
+
35
+ # Create a new S3Upload. In addition to the options recognized
36
+ # by Scanner, this class recognizes <tt>:s3_key</tt>,
37
+ # <tt>:s3_secret</tt>, and <tt>:s3_bucket</tt> and requires all
38
+ # of those.
39
+ #
40
+ # === Options
41
+ # <tt>:s3_key</tt>:: Required. S3 access key.
42
+ # <tt>:s3_secret</tt>:: Required. S3 secret key.
43
+ # <tt>:s3_bucket</tt>:: Required. Bucket to upload workflows to.
44
+ #
45
+ # === Parameters
46
+ # options(Hash):: scanner options
47
+ def initialize(options={})
48
+ super
49
+ s3_key = options.fetch(:s3_key)
50
+ s3_secret = options.fetch(:s3_secret)
51
+ s3 = RightAws::S3.new(aws_access_key_id=s3_key,
52
+ aws_secret_access_key=s3_secret,
53
+ :logger => @logger)
54
+ @bucket = s3.bucket(options.fetch(:s3_bucket))
55
+ raise "Need an actual, existing S3 bucket!" if @bucket.nil?
56
+ end
57
+
58
+ # Upon ending a scan for a workflows, upload the workflows
59
+ # contents to S3.
60
+ #
61
+ # === Parameters
62
+ # workflows(RightScraper::Workflows):: Workflow to scan
63
+ def end(workflow)
64
+ @bucket.put(File.join('Workflows', workflow.resource_hash),
65
+ {
66
+ :metadata => workflow.metadata,
67
+ :manifest => workflow.manifest
68
+ }.to_json)
69
+ end
65
70
 
66
- # Upload a file during scanning.
67
- #
68
- # === Block
69
- # Return the data for this file. We use a block because it may
70
- # not always be necessary to read the data.
71
- #
72
- # === Parameters
73
- # relative_position(String):: relative pathname for file from root of cookbook
74
- def notice(relative_position)
75
- # TBD: Only uplad definition and metadata, will there be more files?
76
- contents = yield
77
- name = Digest::SHA1.hexdigest(contents)
78
- path = File.join('Files', name)
79
- unless @bucket.key(path).exists?
80
- @bucket.put(path, contents)
81
- end
71
+ # Upload a file during scanning.
72
+ #
73
+ # === Block
74
+ # Return the data for this file. We use a block because it may
75
+ # not always be necessary to read the data.
76
+ #
77
+ # === Parameters
78
+ # relative_position(String):: relative pathname for file from root of cookbook
79
+ def notice(relative_position)
80
+ # TBD: Only uplad definition and metadata, will there be more files?
81
+ contents = yield
82
+ name = Digest::SHA1.hexdigest(contents)
83
+ path = File.join('Files', name)
84
+ unless @bucket.key(path).exists?
85
+ @bucket.put(path, contents)
82
86
  end
83
87
  end
84
88
  end