github_bitbucket_audit 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 892bb28e0de26ae3d42697e84ff19131d6764915
4
+ data.tar.gz: af6a30b4030441d0c8e5b3b6cc064d87bc573618
5
+ SHA512:
6
+ metadata.gz: bf452e796efea3b991e457d1f9ff6e1841ef6ee8569b653732bd5275b8f310920fd22dba71754328346816be27e7dbc351b80f7ae6bda3a9f0c32df1299a7f46
7
+ data.tar.gz: f1a352c1eaa3977cc9a60b0a76e2fb0fbd9557491fb69f3c4fd9716f624d3a0a28f693745de5866d892d4135682480ab7b89ecafedb5307ec70deeb065228452
@@ -0,0 +1 @@
1
+ .rvmrc
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
@@ -0,0 +1,64 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ github_bitbucket_audit (0.0.1)
5
+ github_api (~> 0.11.3)
6
+ semantic_logger (~> 2.7.0)
7
+
8
+ GEM
9
+ remote: http://rubygems.org/
10
+ specs:
11
+ addressable (2.3.5)
12
+ atomic (1.1.16-java)
13
+ coderay (1.1.0)
14
+ descendants_tracker (0.0.4)
15
+ thread_safe (~> 0.3, >= 0.3.1)
16
+ faraday (0.9.0)
17
+ multipart-post (>= 1.2, < 3)
18
+ ffi (1.9.3-java)
19
+ github_api (0.11.3)
20
+ addressable (~> 2.3)
21
+ descendants_tracker (~> 0.0.1)
22
+ faraday (~> 0.8, < 0.10)
23
+ hashie (>= 1.2)
24
+ multi_json (>= 1.7.5, < 2.0)
25
+ nokogiri (~> 1.6.0)
26
+ oauth2
27
+ hashie (2.0.5)
28
+ jwt (0.1.11)
29
+ multi_json (>= 1.5)
30
+ method_source (0.8.2)
31
+ mini_portile (0.5.3)
32
+ multi_json (1.9.2)
33
+ multi_xml (0.5.5)
34
+ multipart-post (2.0.0)
35
+ nokogiri (1.6.1-java)
36
+ mini_portile (~> 0.5.0)
37
+ oauth2 (0.9.3)
38
+ faraday (>= 0.8, < 0.10)
39
+ jwt (~> 0.1.8)
40
+ multi_json (~> 1.3)
41
+ multi_xml (~> 0.5)
42
+ rack (~> 1.2)
43
+ pry (0.9.12.6-java)
44
+ coderay (~> 1.0)
45
+ method_source (~> 0.8)
46
+ slop (~> 3.4)
47
+ spoon (~> 0.0)
48
+ rack (1.5.2)
49
+ semantic_logger (2.7.0)
50
+ sync_attr (>= 1.0)
51
+ thread_safe (>= 0.1.0)
52
+ slop (3.5.0)
53
+ spoon (0.0.4)
54
+ ffi
55
+ sync_attr (1.0.0)
56
+ thread_safe (0.3.1-java)
57
+ atomic (>= 1.1.7, < 2)
58
+
59
+ PLATFORMS
60
+ java
61
+
62
+ DEPENDENCIES
63
+ github_bitbucket_audit!
64
+ pry
@@ -0,0 +1,9 @@
1
+ The gem is used for scanning github accounts and check for certain keywords in there.
2
+
3
+ It takes a list of username from a CSV file.
4
+ Goes and fetches the public repos for those users.
5
+ Gets the tree for each of those public repos.
6
+ Checks the filenames against the list of keywords.
7
+ For repos that match keywords, it writes them down in an output file.
8
+
9
+ Usage: gh_bb_audit.rb -u <path_to_user_csv_file> -k <path_tpkeywords_csv_file> -o <path_to_output_file>
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/gh_bb_audit'
4
+ require 'optparse'
5
+
6
+ options = {}
7
+ OptionParser.new do |opts|
8
+ opts.banner = "Usage: gh_bb_audit.rb -u <path_to_user_csv_file> -k <path_tpkeywords_csv_file> -o <path_to_output_file>"
9
+
10
+ opts.on("-users", "-u") do |v|
11
+ options[:user_file_path] = v
12
+ end
13
+
14
+ opts.on("-keywords", "-k") do |v|
15
+ options[:keywords_file_path] = v
16
+ end
17
+
18
+ opts.on("-output", "-o") do |v|
19
+ options[:output_file_path] = v
20
+ end
21
+
22
+ end.parse!
23
+
24
+ ::GhBbAudit::GithubScanner.new(options[:user_file_path], options[:keywords_file_path], options[:output_file_path]).start_scan
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'gh_bb_audit/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "github_bitbucket_audit"
8
+ spec.version = "0.0.1"
9
+ spec.summary = "Looks for specific keywords in the public repos of github and bitbucket"
10
+ spec.date = "2014-04-01"
11
+ spec.description = "The library takes a list of users and searches their public repos for specific keywords"
12
+ spec.authors = ["Ankur Maheshwari"]
13
+ spec.email = ["amaheshwari@systango.com"]
14
+ spec.homepage = "http://systango.com/"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "github_api", "~> 0.11.3"
22
+ spec.add_dependency 'semantic_logger', '~> 2.7.0'
23
+
24
+ spec.add_development_dependency "pry"
25
+ end
@@ -0,0 +1,16 @@
1
+ require 'semantic_logger' unless defined?(logger)
2
+ require 'github_api'
3
+
4
+ module GhBbAudit
5
+ GEM_ROOT = File.join(File.dirname(__FILE__), '..')
6
+ logger = SemanticLogger['GithubBitbucketAudit']
7
+ end
8
+
9
+ require_relative './gh_bb_audit/version'
10
+ require_relative './gh_bb_audit/github_user'
11
+ require_relative './gh_bb_audit/github_repo'
12
+ require_relative './gh_bb_audit/github_scanner'
13
+ require_relative './gh_bb_audit/keywords_list'
14
+ require_relative './gh_bb_audit/keyword_matcher'
15
+ require_relative './gh_bb_audit/users_list'
16
+ require_relative './gh_bb_audit/output_writer'
@@ -0,0 +1,39 @@
1
+ module GhBbAudit
2
+ class GithubRepo
3
+ def initialize(user_name,repo_name)
4
+ @user_name = user_name
5
+ @repo_name = repo_name
6
+ end
7
+
8
+ def get_all_file_paths
9
+ return [] if ( !@user_name || !@repo_name )
10
+ @paths ||= begin
11
+ get_tree.each.inject([]) do |result, file|
12
+ result << file.path
13
+ result
14
+ end
15
+ rescue Exception => e
16
+ []
17
+ end
18
+ @paths
19
+ end
20
+
21
+ private
22
+ def get_tree
23
+ repo = ::Github::Repos.new user: @user_name, repo: @repo_name
24
+ last_commit_sha = repo.commits.all[0].sha rescue nil
25
+ if !last_commit_sha
26
+ logger.info("Could not find any commit for Github Repo:: #{@repo_name} for User:: #{@user_name}")
27
+ return nil
28
+ end
29
+ github = ::Github.new
30
+ @github_tree ||= github.git_data.trees.get @user_name, @repo_name, last_commit_sha , 'recursive' => true rescue nil
31
+ if !@github_tree
32
+ logger.info("Could not fetch tree for Repo:: #{@repo_name} for User:: #{@user_name}")
33
+ return
34
+ end
35
+ @github_tree.tree
36
+ end
37
+
38
+ end
39
+ end
@@ -0,0 +1,32 @@
1
+ module GhBbAudit
2
+ class GithubScanner
3
+ def initialize(user_csv_path, keyword_csv_path, output_file_path)
4
+ @user_csv_path = user_csv_path
5
+ @keyword_csv_path = keyword_csv_path
6
+ @output_file_path = output_file_path
7
+ end
8
+
9
+ def start_scan
10
+ output_writer = ::GhBbAudit::OutputWriter.new(@output_file_path)
11
+ begin
12
+ matcher = ::GhBbAudit::KeywordMatcher.new(::GhBbAudit::KeywordsList.new(@keyword_csv_path).all_keywords)
13
+ all_github_user = ::GhBbAudit::UsersList.new(@user_csv_path).all_users
14
+
15
+ all_github_user.each do |user|
16
+ ::GhBbAudit::GithubUser.new(user).public_repos.each do |public_repo|
17
+ file_paths = ::GhBbAudit::GithubRepo.new(user,public_repo.name).get_all_file_paths
18
+ if matcher.repo_contains_keyword?(file_paths)
19
+ output_writer.write_red_flag_record(user,public_repo.name)
20
+ end
21
+ end
22
+ end
23
+ rescue StandardError => e
24
+ logger.error "Error in scanning Github ", error: e.inspect
25
+ ensure
26
+ output_writer.close
27
+ end
28
+ end
29
+
30
+ end
31
+
32
+ end
@@ -0,0 +1,11 @@
1
+ module GhBbAudit
2
+ class GithubUser
3
+ def initialize(user_name)
4
+ @user_name = user_name
5
+ end
6
+
7
+ def public_repos
8
+ @public_repos ||= (::Github.repos.list user: @user_name rescue [])
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,12 @@
1
+ module GhBbAudit
2
+ class KeywordMatcher
3
+
4
+ def initialize(keywords)
5
+ @keyword_regex = Regexp.union(keywords)
6
+ end
7
+
8
+ def repo_contains_keyword?(repo_file_paths)
9
+ !((repo_file_paths.select { |files| @keyword_regex.match(files) }).empty?)
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,13 @@
1
+ require 'csv'
2
+ module GhBbAudit
3
+ class KeywordsList
4
+ def initialize(path_to_csv_file)
5
+ @keywords_csv_file_path = path_to_csv_file
6
+ end
7
+
8
+ def all_keywords
9
+ #Not rescuing here, as we should crash if we can not get keywords
10
+ @keywords ||= ::CSV.read(@keywords_csv_file_path).flatten.uniq
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,15 @@
1
+ module GhBbAudit
2
+ class OutputWriter
3
+ def initialize(path_to_file)
4
+ @fhandle = File.open(path_to_file, 'w+')
5
+ end
6
+
7
+ def write_red_flag_record(user_name, repo_name)
8
+ @fhandle.puts("Something fishy in REPO::#{repo_name} for USER:: #{user_name}")
9
+ end
10
+
11
+ def close
12
+ @fhandle.close
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,13 @@
1
+ require 'csv'
2
+ module GhBbAudit
3
+ class UsersList
4
+ def initialize(path_to_csv_file)
5
+ @user_csv_file_path = path_to_csv_file
6
+ end
7
+
8
+ def all_users
9
+ #Not rescuing here, as we should crash if we can not get userlist
10
+ @users ||= ::CSV.read(@user_csv_file_path).flatten.uniq
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,3 @@
1
+ module GhBbAudit
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,101 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: github_bitbucket_audit
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Ankur Maheshwari
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: github_api
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 0.11.3
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: 0.11.3
25
+ prerelease: false
26
+ type: :runtime
27
+ - !ruby/object:Gem::Dependency
28
+ name: semantic_logger
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 2.7.0
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ~>
37
+ - !ruby/object:Gem::Version
38
+ version: 2.7.0
39
+ prerelease: false
40
+ type: :runtime
41
+ - !ruby/object:Gem::Dependency
42
+ name: pry
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ requirement: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - '>='
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ prerelease: false
54
+ type: :development
55
+ description: The library takes a list of users and searches their public repos for specific keywords
56
+ email:
57
+ - amaheshwari@systango.com
58
+ executables:
59
+ - gh_bb_audit.rb
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - .gitignore
64
+ - Gemfile
65
+ - Gemfile.lock
66
+ - README.md
67
+ - bin/gh_bb_audit.rb
68
+ - gh_bb_audit.gemspec
69
+ - lib/gh_bb_audit.rb
70
+ - lib/gh_bb_audit/github_repo.rb
71
+ - lib/gh_bb_audit/github_scanner.rb
72
+ - lib/gh_bb_audit/github_user.rb
73
+ - lib/gh_bb_audit/keyword_matcher.rb
74
+ - lib/gh_bb_audit/keywords_list.rb
75
+ - lib/gh_bb_audit/output_writer.rb
76
+ - lib/gh_bb_audit/users_list.rb
77
+ - lib/gh_bb_audit/version.rb
78
+ homepage: http://systango.com/
79
+ licenses: []
80
+ metadata: {}
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - '>='
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ requirements: []
96
+ rubyforge_project:
97
+ rubygems_version: 2.2.2
98
+ signing_key:
99
+ specification_version: 4
100
+ summary: Looks for specific keywords in the public repos of github and bitbucket
101
+ test_files: []