right_scraper 3.2.6 → 5.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/lib/right_scraper.rb +16 -34
  3. data/lib/right_scraper/builders.rb +32 -0
  4. data/lib/right_scraper/builders/base.rb +19 -20
  5. data/lib/right_scraper/builders/filesystem.rb +8 -6
  6. data/lib/right_scraper/builders/union.rb +4 -1
  7. data/lib/right_scraper/loggers.rb +31 -0
  8. data/lib/right_scraper/loggers/base.rb +113 -0
  9. data/lib/right_scraper/loggers/default.rb +98 -0
  10. data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
  11. data/lib/right_scraper/processes.rb +33 -0
  12. data/lib/right_scraper/processes/shell.rb +227 -0
  13. data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
  14. data/lib/right_scraper/processes/svn_client.rb +117 -0
  15. data/lib/right_scraper/processes/warden.rb +358 -0
  16. data/lib/right_scraper/registered_base.rb +154 -0
  17. data/lib/right_scraper/repositories.rb +33 -0
  18. data/lib/right_scraper/repositories/base.rb +271 -232
  19. data/lib/right_scraper/repositories/download.rb +8 -6
  20. data/lib/right_scraper/repositories/git.rb +8 -9
  21. data/lib/right_scraper/repositories/svn.rb +8 -8
  22. data/lib/right_scraper/resources.rb +32 -0
  23. data/lib/right_scraper/resources/base.rb +5 -1
  24. data/lib/right_scraper/resources/cookbook.rb +34 -27
  25. data/lib/right_scraper/resources/workflow.rb +27 -28
  26. data/lib/right_scraper/retrievers.rb +34 -0
  27. data/lib/right_scraper/retrievers/base.rb +80 -84
  28. data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
  29. data/lib/right_scraper/retrievers/download.rb +125 -117
  30. data/lib/right_scraper/retrievers/git.rb +377 -223
  31. data/lib/right_scraper/retrievers/svn.rb +102 -62
  32. data/lib/right_scraper/scanners.rb +37 -0
  33. data/lib/right_scraper/scanners/base.rb +77 -80
  34. data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
  35. data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
  36. data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
  37. data/lib/right_scraper/scanners/union.rb +61 -58
  38. data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
  39. data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
  40. data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
  41. data/lib/right_scraper/scrapers.rb +32 -0
  42. data/lib/right_scraper/scrapers/base.rb +217 -205
  43. data/lib/right_scraper/scrapers/cookbook.rb +42 -40
  44. data/lib/right_scraper/scrapers/workflow.rb +57 -58
  45. data/lib/right_scraper/version.rb +3 -0
  46. data/right_scraper.gemspec +12 -16
  47. metadata +57 -163
  48. data/Gemfile +0 -15
  49. data/Rakefile +0 -89
  50. data/lib/right_scraper/logger.rb +0 -107
  51. data/lib/right_scraper/loggers/noisy.rb +0 -85
  52. data/lib/right_scraper/repositories/mock.rb +0 -70
  53. data/lib/right_scraper/retrievers/checkout.rb +0 -79
  54. data/lib/right_scraper/scraper_logger.rb +0 -66
  55. data/lib/right_scraper/svn_client.rb +0 -164
  56. data/right_scraper.rconf +0 -13
  57. data/spec/builder_spec.rb +0 -50
  58. data/spec/cookbook_helper.rb +0 -73
  59. data/spec/cookbook_manifest_spec.rb +0 -93
  60. data/spec/cookbook_s3_upload_spec.rb +0 -159
  61. data/spec/download/download_retriever_spec.rb +0 -118
  62. data/spec/download/download_retriever_spec_helper.rb +0 -72
  63. data/spec/download/download_spec.rb +0 -128
  64. data/spec/download/multi_dir_spec.rb +0 -106
  65. data/spec/download/multi_dir_spec_helper.rb +0 -40
  66. data/spec/git/cookbook_spec.rb +0 -165
  67. data/spec/git/demokey +0 -27
  68. data/spec/git/demokey.pub +0 -1
  69. data/spec/git/password_key +0 -30
  70. data/spec/git/password_key.pub +0 -1
  71. data/spec/git/repository_spec.rb +0 -110
  72. data/spec/git/retriever_spec.rb +0 -553
  73. data/spec/git/retriever_spec_helper.rb +0 -112
  74. data/spec/git/scraper_spec.rb +0 -151
  75. data/spec/git/ssh_spec.rb +0 -174
  76. data/spec/git/url_spec.rb +0 -103
  77. data/spec/logger_spec.rb +0 -185
  78. data/spec/repository_spec.rb +0 -111
  79. data/spec/retriever_spec_helper.rb +0 -146
  80. data/spec/scanner_spec.rb +0 -61
  81. data/spec/scraper_helper.rb +0 -88
  82. data/spec/scraper_spec.rb +0 -147
  83. data/spec/spec_helper.rb +0 -185
  84. data/spec/svn/cookbook_spec.rb +0 -96
  85. data/spec/svn/multi_svn_spec.rb +0 -64
  86. data/spec/svn/multi_svn_spec_helper.rb +0 -40
  87. data/spec/svn/repository_spec.rb +0 -72
  88. data/spec/svn/retriever_spec.rb +0 -266
  89. data/spec/svn/scraper_spec.rb +0 -90
  90. data/spec/svn/svn_retriever_spec_helper.rb +0 -90
  91. data/spec/svn/url_spec.rb +0 -47
  92. data/spec/url_spec.rb +0 -164
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -21,69 +21,72 @@
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
- module RightScraper
25
- module Scanners
26
- # Union scanner, to permit running multiple scanners while only
27
- # walking the fs once.
28
- class Union
29
- # Create a new union scanner. Recognizes no new options.
30
- #
31
- # === Parameters
32
- # classes(List):: List of Scanner classes to run
33
- # options(Hash):: scanner options
34
- def initialize(classes, options={})
35
- @subscanners = classes.map {|klass| klass.new(options)}
36
- end
24
+ # ancestor
25
+ require 'right_scraper/scanners'
37
26
 
38
- # Notify subscanners that all scans have completed.
39
- def finish
40
- @subscanners.each {|scanner| scanner.finish}
41
- end
27
+ module RightScraper::Scanners
42
28
 
43
- # Begin a scan for the given resource.
44
- #
45
- # === Parameters
46
- # resource(RightScraper::Resource::Base):: resource to scan
47
- def begin(resource)
48
- @subscanners.each {|scanner| scanner.begin(resource)}
49
- end
29
+ # Union scanner, to permit running multiple scanners while only
30
+ # walking the fs once.
31
+ class Union
50
32
 
51
- # Finish a scan for the given resource.
52
- #
53
- # === Parameters
54
- # resource(RightScraper::Resource::Base):: resource that just finished scanning
55
- def end(resource)
56
- @subscanners.each {|scanner| scanner.end(resource)}
57
- end
33
+ # Create a new union scanner. Recognizes no new options.
34
+ #
35
+ # === Parameters
36
+ # classes(List):: List of Scanner classes to run
37
+ # options(Hash):: scanner options
38
+ def initialize(classes, options={})
39
+ @subscanners = classes.map {|klass| klass.new(options)}
40
+ end
41
+
42
+ # Notify subscanners that all scans have completed.
43
+ def finish
44
+ @subscanners.each {|scanner| scanner.finish}
45
+ end
46
+
47
+ # Begin a scan for the given resource.
48
+ #
49
+ # === Parameters
50
+ # resource(RightScraper::Resource::Base):: resource to scan
51
+ def begin(resource)
52
+ @subscanners.each {|scanner| scanner.begin(resource)}
53
+ end
54
+
55
+ # Finish a scan for the given resource.
56
+ #
57
+ # === Parameters
58
+ # resource(RightScraper::Resource::Base):: resource that just finished scanning
59
+ def end(resource)
60
+ @subscanners.each {|scanner| scanner.end(resource)}
61
+ end
58
62
 
59
- # Notice a file during scanning.
60
- #
61
- # === Block
62
- # Return the data for this file. We use a block because it may
63
- # not always be necessary to read the data.
64
- #
65
- # === Parameters
66
- # relative_position(String):: relative pathname for the file from the root of resource
67
- def notice(relative_position)
68
- data = nil
69
- @subscanners.each {|scanner| scanner.notice(relative_position) {
70
- data = yield if data.nil?
71
- data
72
- }
63
+ # Notice a file during scanning.
64
+ #
65
+ # === Block
66
+ # Return the data for this file. We use a block because it may
67
+ # not always be necessary to read the data.
68
+ #
69
+ # === Parameters
70
+ # relative_position(String):: relative pathname for the file from the root of resource
71
+ def notice(relative_position)
72
+ data = nil
73
+ @subscanners.each {|scanner| scanner.notice(relative_position) {
74
+ data = yield if data.nil?
75
+ data
73
76
  }
74
- end
77
+ }
78
+ end
75
79
 
76
- # Notice a directory during scanning. Returns true if any of the
77
- # subscanners report that they should recurse into the directory.
78
- #
79
- # === Parameters
80
- # relative_position(String):: relative pathname for directory from root of resource
81
- #
82
- # === Returns
83
- # Boolean:: should the scanning recurse into the directory
84
- def notice_dir(relative_position)
85
- @subscanners.any? {|scanner| scanner.notice_dir(relative_position)}
86
- end
80
+ # Notice a directory during scanning. Returns true if any of the
81
+ # subscanners report that they should recurse into the directory.
82
+ #
83
+ # === Parameters
84
+ # relative_position(String):: relative pathname for directory from root of resource
85
+ #
86
+ # === Returns
87
+ # Boolean:: should the scanning recurse into the directory
88
+ def notice_dir(relative_position)
89
+ @subscanners.any? {|scanner| scanner.notice_dir(relative_position)}
87
90
  end
88
91
  end
89
92
  end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -21,66 +21,67 @@
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
- require File.expand_path(File.join(File.dirname(__FILE__), 'base'))
24
+ # ancestor
25
+ require 'right_scraper/scanners'
26
+
25
27
  require 'digest/sha1'
26
28
 
27
- module RightScraper
28
- module Scanners
29
- # Build manifests from a filesystem.
30
- class WorkflowManifest < Base
31
- # Create a new manifest scanner. Does not accept any new arguments.
32
- def initialize(*args)
33
- super
34
- @manifest = {}
35
- end
29
+ module RightScraper::Scanners
36
30
 
37
- # Retrieve relative workflow files positions
38
- #
39
- # === Parameters
40
- # workflow(Resources::Workflow):: Workflow whose manifest is being built
41
- def begin(workflow)
42
- @workflow = workflow
43
- @metadata_filename = File.basename(@workflow.metadata_path)
44
- @definition_filename = File.basename(@workflow.definition_path)
45
- end
31
+ # Build manifests from a filesystem.
32
+ class WorkflowManifest < ::RightScraper::Scanners::Base
33
+ # Create a new manifest scanner. Does not accept any new arguments.
34
+ def initialize(*args)
35
+ super
36
+ @manifest = {}
37
+ end
46
38
 
47
- # Complete a scan for the given resource.
48
- #
49
- # === Parameters ===
50
- # resource(RightScraper::Resources::Base):: resource to scan
51
- def end(resource)
52
- resource.manifest = @manifest
53
- @manifest = {}
54
- end
39
+ # Retrieve relative workflow files positions
40
+ #
41
+ # === Parameters
42
+ # workflow(Resources::Workflow):: Workflow whose manifest is being built
43
+ def begin(workflow)
44
+ @workflow = workflow
45
+ @metadata_filename = File.basename(@workflow.metadata_path)
46
+ @definition_filename = File.basename(@workflow.definition_path)
47
+ end
55
48
 
56
- # Notice a file during scanning.
57
- #
58
- # === Block ===
59
- # Return the data for this file. We use a block because it may
60
- # not always be necessary to read the data.
61
- #
62
- # === Parameters ===
63
- # relative_position(String):: relative pathname for file from root of resource
64
- def notice(relative_position)
65
- if [ @metadata_filename, @definition_filename ].include?(relative_position)
66
- @manifest[relative_position] = Digest::SHA1.hexdigest(yield)
67
- end
68
- end
49
+ # Complete a scan for the given resource.
50
+ #
51
+ # === Parameters ===
52
+ # resource(RightScraper::Resources::Base):: resource to scan
53
+ def end(resource)
54
+ resource.manifest = @manifest
55
+ @manifest = {}
56
+ end
69
57
 
70
- # Notice a directory during scanning. Since the workflow definition and
71
- # metadata live in the root directory we don't need to recurse,
72
- # but we do need to go into the first directory (identified by
73
- # +relative_position+ being +nil+).
74
- #
75
- # === Parameters
76
- # relative_position(String):: relative pathname for the directory from root of workflow
77
- #
78
- # === Returns
79
- # Boolean:: should the scanning recurse into the directory
80
- def notice_dir(relative_position)
81
- relative_position == nil
58
+ # Notice a file during scanning.
59
+ #
60
+ # === Block ===
61
+ # Return the data for this file. We use a block because it may
62
+ # not always be necessary to read the data.
63
+ #
64
+ # === Parameters ===
65
+ # relative_position(String):: relative pathname for file from root of resource
66
+ def notice(relative_position)
67
+ if [ @metadata_filename, @definition_filename ].include?(relative_position)
68
+ @manifest[relative_position] = Digest::SHA1.hexdigest(yield)
82
69
  end
83
-
84
70
  end
71
+
72
+ # Notice a directory during scanning. Since the workflow definition and
73
+ # metadata live in the root directory we don't need to recurse,
74
+ # but we do need to go into the first directory (identified by
75
+ # +relative_position+ being +nil+).
76
+ #
77
+ # === Parameters
78
+ # relative_position(String):: relative pathname for the directory from root of workflow
79
+ #
80
+ # === Returns
81
+ # Boolean:: should the scanning recurse into the directory
82
+ def notice_dir(relative_position)
83
+ relative_position == nil
84
+ end
85
+
85
86
  end
86
87
  end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -21,50 +21,52 @@
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
+ # ancestor
25
+ require 'right_scraper/scanners'
26
+
24
27
  require 'json'
25
28
 
26
- module RightScraper
27
- module Scanners
28
- # Load workflow metadata from a filesystem.
29
- class WorkflowMetadata < Base
30
- # Begin a scan for the given workflow.
31
- #
32
- # === Parameters
33
- # workflow(RightScraper::Resources::Workflow):: workflow to scan
34
- def begin(workflow)
35
- @workflow = workflow
36
- @metadata_filename = File.basename(workflow.metadata_path)
37
- end
29
+ module RightScraper::Scanners
30
+
31
+ # Load workflow metadata from a filesystem.
32
+ class WorkflowMetadata < ::RightScraper::Scanners::Base
33
+ # Begin a scan for the given workflow.
34
+ #
35
+ # === Parameters
36
+ # workflow(RightScraper::Resources::Workflow):: workflow to scan
37
+ def begin(workflow)
38
+ @workflow = workflow
39
+ @metadata_filename = File.basename(workflow.metadata_path)
40
+ end
38
41
 
39
- # Notice a file during scanning.
40
- #
41
- # === Block
42
- # Return the data for this file. We use a block because it may
43
- # not always be necessary to read the data.
44
- #
45
- # === Parameters
46
- # relative_position(String):: relative pathname for the file from root of workflow
47
- def notice(relative_position)
48
- if relative_position == @metadata_filename
49
- @logger.operation(:metadata_parsing) do
50
- @workflow.metadata = JSON.parse(yield)
51
- end
42
+ # Notice a file during scanning.
43
+ #
44
+ # === Block
45
+ # Return the data for this file. We use a block because it may
46
+ # not always be necessary to read the data.
47
+ #
48
+ # === Parameters
49
+ # relative_position(String):: relative pathname for the file from root of workflow
50
+ def notice(relative_position)
51
+ if relative_position == @metadata_filename
52
+ @logger.operation(:metadata_parsing) do
53
+ @workflow.metadata = JSON.parse(yield)
52
54
  end
53
55
  end
56
+ end
54
57
 
55
- # Notice a directory during scanning. Since the workflow definition and
56
- # metadata live in the root directory we don't need to recurse,
57
- # but we do need to go into the first directory (identified by
58
- # +relative_position+ being +nil+).
59
- #
60
- # === Parameters
61
- # relative_position(String):: relative pathname for the directory from root of workflow
62
- #
63
- # === Returns
64
- # Boolean:: should the scanning recurse into the directory
65
- def notice_dir(relative_position)
66
- relative_position == nil
67
- end
58
+ # Notice a directory during scanning. Since the workflow definition and
59
+ # metadata live in the root directory we don't need to recurse,
60
+ # but we do need to go into the first directory (identified by
61
+ # +relative_position+ being +nil+).
62
+ #
63
+ # === Parameters
64
+ # relative_position(String):: relative pathname for the directory from root of workflow
65
+ #
66
+ # === Returns
67
+ # Boolean:: should the scanning recurse into the directory
68
+ def notice_dir(relative_position)
69
+ relative_position == nil
68
70
  end
69
71
  end
70
72
  end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -20,65 +20,69 @@
20
20
  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
+
24
+ # ancestor
25
+ require 'right_scraper/scanners'
26
+
23
27
  require 'right_aws'
24
28
  require 'json'
25
29
 
26
- module RightScraper
27
- module Scanners
28
- # Upload workflow definition and metadata to an S3 bucket.
29
- class WorkflowS3Upload < Base
30
- # Create a new S3Upload. In addition to the options recognized
31
- # by Scanner, this class recognizes <tt>:s3_key</tt>,
32
- # <tt>:s3_secret</tt>, and <tt>:s3_bucket</tt> and requires all
33
- # of those.
34
- #
35
- # === Options
36
- # <tt>:s3_key</tt>:: Required. S3 access key.
37
- # <tt>:s3_secret</tt>:: Required. S3 secret key.
38
- # <tt>:s3_bucket</tt>:: Required. Bucket to upload workflows to.
39
- #
40
- # === Parameters
41
- # options(Hash):: scanner options
42
- def initialize(options={})
43
- super
44
- s3_key = options.fetch(:s3_key)
45
- s3_secret = options.fetch(:s3_secret)
46
- s3 = RightAws::S3.new(aws_access_key_id=s3_key,
47
- aws_secret_access_key=s3_secret,
48
- :logger => Logger.new)
49
- @bucket = s3.bucket(options.fetch(:s3_bucket))
50
- raise "Need an actual, existing S3 bucket!" if @bucket.nil?
51
- end
30
+ module RightScraper::Scanners
52
31
 
53
- # Upon ending a scan for a workflows, upload the workflows
54
- # contents to S3.
55
- #
56
- # === Parameters
57
- # workflows(RightScraper::Workflows):: Workflow to scan
58
- def end(workflow)
59
- @bucket.put(File.join('Workflows', workflow.resource_hash),
60
- {
61
- :metadata => workflow.metadata,
62
- :manifest => workflow.manifest
63
- }.to_json)
64
- end
32
+ # Upload workflow definition and metadata to an S3 bucket.
33
+ class WorkflowS3Upload < ::RightScraper::Scanners::Base
34
+
35
+ # Create a new S3Upload. In addition to the options recognized
36
+ # by Scanner, this class recognizes <tt>:s3_key</tt>,
37
+ # <tt>:s3_secret</tt>, and <tt>:s3_bucket</tt> and requires all
38
+ # of those.
39
+ #
40
+ # === Options
41
+ # <tt>:s3_key</tt>:: Required. S3 access key.
42
+ # <tt>:s3_secret</tt>:: Required. S3 secret key.
43
+ # <tt>:s3_bucket</tt>:: Required. Bucket to upload workflows to.
44
+ #
45
+ # === Parameters
46
+ # options(Hash):: scanner options
47
+ def initialize(options={})
48
+ super
49
+ s3_key = options.fetch(:s3_key)
50
+ s3_secret = options.fetch(:s3_secret)
51
+ s3 = RightAws::S3.new(aws_access_key_id=s3_key,
52
+ aws_secret_access_key=s3_secret,
53
+ :logger => @logger)
54
+ @bucket = s3.bucket(options.fetch(:s3_bucket))
55
+ raise "Need an actual, existing S3 bucket!" if @bucket.nil?
56
+ end
57
+
58
+ # Upon ending a scan for a workflows, upload the workflows
59
+ # contents to S3.
60
+ #
61
+ # === Parameters
62
+ # workflows(RightScraper::Workflows):: Workflow to scan
63
+ def end(workflow)
64
+ @bucket.put(File.join('Workflows', workflow.resource_hash),
65
+ {
66
+ :metadata => workflow.metadata,
67
+ :manifest => workflow.manifest
68
+ }.to_json)
69
+ end
65
70
 
66
- # Upload a file during scanning.
67
- #
68
- # === Block
69
- # Return the data for this file. We use a block because it may
70
- # not always be necessary to read the data.
71
- #
72
- # === Parameters
73
- # relative_position(String):: relative pathname for file from root of cookbook
74
- def notice(relative_position)
75
- # TBD: Only uplad definition and metadata, will there be more files?
76
- contents = yield
77
- name = Digest::SHA1.hexdigest(contents)
78
- path = File.join('Files', name)
79
- unless @bucket.key(path).exists?
80
- @bucket.put(path, contents)
81
- end
71
+ # Upload a file during scanning.
72
+ #
73
+ # === Block
74
+ # Return the data for this file. We use a block because it may
75
+ # not always be necessary to read the data.
76
+ #
77
+ # === Parameters
78
+ # relative_position(String):: relative pathname for file from root of cookbook
79
+ def notice(relative_position)
80
+ # TBD: Only uplad definition and metadata, will there be more files?
81
+ contents = yield
82
+ name = Digest::SHA1.hexdigest(contents)
83
+ path = File.join('Files', name)
84
+ unless @bucket.key(path).exists?
85
+ @bucket.put(path, contents)
82
86
  end
83
87
  end
84
88
  end