github_bitbucket_audit 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +64 -0
- data/README.md +9 -0
- data/bin/gh_bb_audit.rb +24 -0
- data/gh_bb_audit.gemspec +25 -0
- data/lib/gh_bb_audit.rb +16 -0
- data/lib/gh_bb_audit/github_repo.rb +39 -0
- data/lib/gh_bb_audit/github_scanner.rb +32 -0
- data/lib/gh_bb_audit/github_user.rb +11 -0
- data/lib/gh_bb_audit/keyword_matcher.rb +12 -0
- data/lib/gh_bb_audit/keywords_list.rb +13 -0
- data/lib/gh_bb_audit/output_writer.rb +15 -0
- data/lib/gh_bb_audit/users_list.rb +13 -0
- data/lib/gh_bb_audit/version.rb +3 -0
- metadata +101 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 892bb28e0de26ae3d42697e84ff19131d6764915
|
|
4
|
+
data.tar.gz: af6a30b4030441d0c8e5b3b6cc064d87bc573618
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: bf452e796efea3b991e457d1f9ff6e1841ef6ee8569b653732bd5275b8f310920fd22dba71754328346816be27e7dbc351b80f7ae6bda3a9f0c32df1299a7f46
|
|
7
|
+
data.tar.gz: f1a352c1eaa3977cc9a60b0a76e2fb0fbd9557491fb69f3c4fd9716f624d3a0a28f693745de5866d892d4135682480ab7b89ecafedb5307ec70deeb065228452
|
data/.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
.rvmrc
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
github_bitbucket_audit (0.0.1)
|
|
5
|
+
github_api (~> 0.11.3)
|
|
6
|
+
semantic_logger (~> 2.7.0)
|
|
7
|
+
|
|
8
|
+
GEM
|
|
9
|
+
remote: http://rubygems.org/
|
|
10
|
+
specs:
|
|
11
|
+
addressable (2.3.5)
|
|
12
|
+
atomic (1.1.16-java)
|
|
13
|
+
coderay (1.1.0)
|
|
14
|
+
descendants_tracker (0.0.4)
|
|
15
|
+
thread_safe (~> 0.3, >= 0.3.1)
|
|
16
|
+
faraday (0.9.0)
|
|
17
|
+
multipart-post (>= 1.2, < 3)
|
|
18
|
+
ffi (1.9.3-java)
|
|
19
|
+
github_api (0.11.3)
|
|
20
|
+
addressable (~> 2.3)
|
|
21
|
+
descendants_tracker (~> 0.0.1)
|
|
22
|
+
faraday (~> 0.8, < 0.10)
|
|
23
|
+
hashie (>= 1.2)
|
|
24
|
+
multi_json (>= 1.7.5, < 2.0)
|
|
25
|
+
nokogiri (~> 1.6.0)
|
|
26
|
+
oauth2
|
|
27
|
+
hashie (2.0.5)
|
|
28
|
+
jwt (0.1.11)
|
|
29
|
+
multi_json (>= 1.5)
|
|
30
|
+
method_source (0.8.2)
|
|
31
|
+
mini_portile (0.5.3)
|
|
32
|
+
multi_json (1.9.2)
|
|
33
|
+
multi_xml (0.5.5)
|
|
34
|
+
multipart-post (2.0.0)
|
|
35
|
+
nokogiri (1.6.1-java)
|
|
36
|
+
mini_portile (~> 0.5.0)
|
|
37
|
+
oauth2 (0.9.3)
|
|
38
|
+
faraday (>= 0.8, < 0.10)
|
|
39
|
+
jwt (~> 0.1.8)
|
|
40
|
+
multi_json (~> 1.3)
|
|
41
|
+
multi_xml (~> 0.5)
|
|
42
|
+
rack (~> 1.2)
|
|
43
|
+
pry (0.9.12.6-java)
|
|
44
|
+
coderay (~> 1.0)
|
|
45
|
+
method_source (~> 0.8)
|
|
46
|
+
slop (~> 3.4)
|
|
47
|
+
spoon (~> 0.0)
|
|
48
|
+
rack (1.5.2)
|
|
49
|
+
semantic_logger (2.7.0)
|
|
50
|
+
sync_attr (>= 1.0)
|
|
51
|
+
thread_safe (>= 0.1.0)
|
|
52
|
+
slop (3.5.0)
|
|
53
|
+
spoon (0.0.4)
|
|
54
|
+
ffi
|
|
55
|
+
sync_attr (1.0.0)
|
|
56
|
+
thread_safe (0.3.1-java)
|
|
57
|
+
atomic (>= 1.1.7, < 2)
|
|
58
|
+
|
|
59
|
+
PLATFORMS
|
|
60
|
+
java
|
|
61
|
+
|
|
62
|
+
DEPENDENCIES
|
|
63
|
+
github_bitbucket_audit!
|
|
64
|
+
pry
|
data/README.md
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
The gem is used for scanning github accounts and check for certain keywords in there.
|
|
2
|
+
|
|
3
|
+
It takes a list of username from a CSV file.
|
|
4
|
+
Goes and fetches the public repos for those users.
|
|
5
|
+
Gets the tree for each of those public repos.
|
|
6
|
+
Checks the filenames against the list of keywords.
|
|
7
|
+
For repos that match keywords, it writes them down in an output file.
|
|
8
|
+
|
|
9
|
+
Usage: gh_bb_audit.rb -u <path_to_user_csv_file> -k <path_tpkeywords_csv_file> -o <path_to_output_file>
|
data/bin/gh_bb_audit.rb
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require_relative '../lib/gh_bb_audit'
|
|
4
|
+
require 'optparse'
|
|
5
|
+
|
|
6
|
+
options = {}
|
|
7
|
+
OptionParser.new do |opts|
|
|
8
|
+
opts.banner = "Usage: gh_bb_audit.rb -u <path_to_user_csv_file> -k <path_tpkeywords_csv_file> -o <path_to_output_file>"
|
|
9
|
+
|
|
10
|
+
opts.on("-users", "-u") do |v|
|
|
11
|
+
options[:user_file_path] = v
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
opts.on("-keywords", "-k") do |v|
|
|
15
|
+
options[:keywords_file_path] = v
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
opts.on("-output", "-o") do |v|
|
|
19
|
+
options[:output_file_path] = v
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
end.parse!
|
|
23
|
+
|
|
24
|
+
::GhBbAudit::GithubScanner.new(options[:user_file_path], options[:keywords_file_path], options[:output_file_path]).start_scan
|
data/gh_bb_audit.gemspec
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
|
+
require 'gh_bb_audit/version'
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |spec|
|
|
7
|
+
spec.name = "github_bitbucket_audit"
|
|
8
|
+
spec.version = "0.0.1"
|
|
9
|
+
spec.summary = "Looks for specific keywords in the public repos of github and bitbucket"
|
|
10
|
+
spec.date = "2014-04-01"
|
|
11
|
+
spec.description = "The library takes a list of users and searches their public repos for specific keywords"
|
|
12
|
+
spec.authors = ["Ankur Maheshwari"]
|
|
13
|
+
spec.email = ["amaheshwari@systango.com"]
|
|
14
|
+
spec.homepage = "http://systango.com/"
|
|
15
|
+
|
|
16
|
+
spec.files = `git ls-files`.split($/)
|
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
|
19
|
+
spec.require_paths = ["lib"]
|
|
20
|
+
|
|
21
|
+
spec.add_dependency "github_api", "~> 0.11.3"
|
|
22
|
+
spec.add_dependency 'semantic_logger', '~> 2.7.0'
|
|
23
|
+
|
|
24
|
+
spec.add_development_dependency "pry"
|
|
25
|
+
end
|
data/lib/gh_bb_audit.rb
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
require 'semantic_logger' unless defined?(logger)
|
|
2
|
+
require 'github_api'
|
|
3
|
+
|
|
4
|
+
module GhBbAudit
|
|
5
|
+
GEM_ROOT = File.join(File.dirname(__FILE__), '..')
|
|
6
|
+
logger = SemanticLogger['GithubBitbucketAudit']
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
require_relative './gh_bb_audit/version'
|
|
10
|
+
require_relative './gh_bb_audit/github_user'
|
|
11
|
+
require_relative './gh_bb_audit/github_repo'
|
|
12
|
+
require_relative './gh_bb_audit/github_scanner'
|
|
13
|
+
require_relative './gh_bb_audit/keywords_list'
|
|
14
|
+
require_relative './gh_bb_audit/keyword_matcher'
|
|
15
|
+
require_relative './gh_bb_audit/users_list'
|
|
16
|
+
require_relative './gh_bb_audit/output_writer'
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
module GhBbAudit
|
|
2
|
+
class GithubRepo
|
|
3
|
+
def initialize(user_name,repo_name)
|
|
4
|
+
@user_name = user_name
|
|
5
|
+
@repo_name = repo_name
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def get_all_file_paths
|
|
9
|
+
return [] if ( !@user_name || !@repo_name )
|
|
10
|
+
@paths ||= begin
|
|
11
|
+
get_tree.each.inject([]) do |result, file|
|
|
12
|
+
result << file.path
|
|
13
|
+
result
|
|
14
|
+
end
|
|
15
|
+
rescue Exception => e
|
|
16
|
+
[]
|
|
17
|
+
end
|
|
18
|
+
@paths
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
def get_tree
|
|
23
|
+
repo = ::Github::Repos.new user: @user_name, repo: @repo_name
|
|
24
|
+
last_commit_sha = repo.commits.all[0].sha rescue nil
|
|
25
|
+
if !last_commit_sha
|
|
26
|
+
logger.info("Could not find any commit for Github Repo:: #{@repo_name} for User:: #{@user_name}")
|
|
27
|
+
return nil
|
|
28
|
+
end
|
|
29
|
+
github = ::Github.new
|
|
30
|
+
@github_tree ||= github.git_data.trees.get @user_name, @repo_name, last_commit_sha , 'recursive' => true rescue nil
|
|
31
|
+
if !@github_tree
|
|
32
|
+
logger.info("Could not fetch tree for Repo:: #{@repo_name} for User:: #{@user_name}")
|
|
33
|
+
return
|
|
34
|
+
end
|
|
35
|
+
@github_tree.tree
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
module GhBbAudit
|
|
2
|
+
class GithubScanner
|
|
3
|
+
def initialize(user_csv_path, keyword_csv_path, output_file_path)
|
|
4
|
+
@user_csv_path = user_csv_path
|
|
5
|
+
@keyword_csv_path = keyword_csv_path
|
|
6
|
+
@output_file_path = output_file_path
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def start_scan
|
|
10
|
+
output_writer = ::GhBbAudit::OutputWriter.new(@output_file_path)
|
|
11
|
+
begin
|
|
12
|
+
matcher = ::GhBbAudit::KeywordMatcher.new(::GhBbAudit::KeywordsList.new(@keyword_csv_path).all_keywords)
|
|
13
|
+
all_github_user = ::GhBbAudit::UsersList.new(@user_csv_path).all_users
|
|
14
|
+
|
|
15
|
+
all_github_user.each do |user|
|
|
16
|
+
::GhBbAudit::GithubUser.new(user).public_repos.each do |public_repo|
|
|
17
|
+
file_paths = ::GhBbAudit::GithubRepo.new(user,public_repo.name).get_all_file_paths
|
|
18
|
+
if matcher.repo_contains_keyword?(file_paths)
|
|
19
|
+
output_writer.write_red_flag_record(user,public_repo.name)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
rescue StandardError => e
|
|
24
|
+
logger.error "Error in scanning Github ", error: e.inspect
|
|
25
|
+
ensure
|
|
26
|
+
output_writer.close
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
end
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
module GhBbAudit
|
|
2
|
+
class KeywordMatcher
|
|
3
|
+
|
|
4
|
+
def initialize(keywords)
|
|
5
|
+
@keyword_regex = Regexp.union(keywords)
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def repo_contains_keyword?(repo_file_paths)
|
|
9
|
+
!((repo_file_paths.select { |files| @keyword_regex.match(files) }).empty?)
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
require 'csv'
|
|
2
|
+
module GhBbAudit
|
|
3
|
+
class KeywordsList
|
|
4
|
+
def initialize(path_to_csv_file)
|
|
5
|
+
@keywords_csv_file_path = path_to_csv_file
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def all_keywords
|
|
9
|
+
#Not rescuing here, as we should crash if we can not get keywords
|
|
10
|
+
@keywords ||= ::CSV.read(@keywords_csv_file_path).flatten.uniq
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
module GhBbAudit
|
|
2
|
+
class OutputWriter
|
|
3
|
+
def initialize(path_to_file)
|
|
4
|
+
@fhandle = File.open(path_to_file, 'w+')
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
def write_red_flag_record(user_name, repo_name)
|
|
8
|
+
@fhandle.puts("Something fishy in REPO::#{repo_name} for USER:: #{user_name}")
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def close
|
|
12
|
+
@fhandle.close
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
require 'csv'
|
|
2
|
+
module GhBbAudit
|
|
3
|
+
class UsersList
|
|
4
|
+
def initialize(path_to_csv_file)
|
|
5
|
+
@user_csv_file_path = path_to_csv_file
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def all_users
|
|
9
|
+
#Not rescuing here, as we should crash if we can not get userlist
|
|
10
|
+
@users ||= ::CSV.read(@user_csv_file_path).flatten.uniq
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: github_bitbucket_audit
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Ankur Maheshwari
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2014-04-01 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: github_api
|
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ~>
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: 0.11.3
|
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
|
21
|
+
requirements:
|
|
22
|
+
- - ~>
|
|
23
|
+
- !ruby/object:Gem::Version
|
|
24
|
+
version: 0.11.3
|
|
25
|
+
prerelease: false
|
|
26
|
+
type: :runtime
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: semantic_logger
|
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ~>
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: 2.7.0
|
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
|
35
|
+
requirements:
|
|
36
|
+
- - ~>
|
|
37
|
+
- !ruby/object:Gem::Version
|
|
38
|
+
version: 2.7.0
|
|
39
|
+
prerelease: false
|
|
40
|
+
type: :runtime
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: pry
|
|
43
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - '>='
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '0'
|
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
|
49
|
+
requirements:
|
|
50
|
+
- - '>='
|
|
51
|
+
- !ruby/object:Gem::Version
|
|
52
|
+
version: '0'
|
|
53
|
+
prerelease: false
|
|
54
|
+
type: :development
|
|
55
|
+
description: The library takes a list of users and searches their public repos for specific keywords
|
|
56
|
+
email:
|
|
57
|
+
- amaheshwari@systango.com
|
|
58
|
+
executables:
|
|
59
|
+
- gh_bb_audit.rb
|
|
60
|
+
extensions: []
|
|
61
|
+
extra_rdoc_files: []
|
|
62
|
+
files:
|
|
63
|
+
- .gitignore
|
|
64
|
+
- Gemfile
|
|
65
|
+
- Gemfile.lock
|
|
66
|
+
- README.md
|
|
67
|
+
- bin/gh_bb_audit.rb
|
|
68
|
+
- gh_bb_audit.gemspec
|
|
69
|
+
- lib/gh_bb_audit.rb
|
|
70
|
+
- lib/gh_bb_audit/github_repo.rb
|
|
71
|
+
- lib/gh_bb_audit/github_scanner.rb
|
|
72
|
+
- lib/gh_bb_audit/github_user.rb
|
|
73
|
+
- lib/gh_bb_audit/keyword_matcher.rb
|
|
74
|
+
- lib/gh_bb_audit/keywords_list.rb
|
|
75
|
+
- lib/gh_bb_audit/output_writer.rb
|
|
76
|
+
- lib/gh_bb_audit/users_list.rb
|
|
77
|
+
- lib/gh_bb_audit/version.rb
|
|
78
|
+
homepage: http://systango.com/
|
|
79
|
+
licenses: []
|
|
80
|
+
metadata: {}
|
|
81
|
+
post_install_message:
|
|
82
|
+
rdoc_options: []
|
|
83
|
+
require_paths:
|
|
84
|
+
- lib
|
|
85
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
86
|
+
requirements:
|
|
87
|
+
- - '>='
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
version: '0'
|
|
90
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
91
|
+
requirements:
|
|
92
|
+
- - '>='
|
|
93
|
+
- !ruby/object:Gem::Version
|
|
94
|
+
version: '0'
|
|
95
|
+
requirements: []
|
|
96
|
+
rubyforge_project:
|
|
97
|
+
rubygems_version: 2.2.2
|
|
98
|
+
signing_key:
|
|
99
|
+
specification_version: 4
|
|
100
|
+
summary: Looks for specific keywords in the public repos of github and bitbucket
|
|
101
|
+
test_files: []
|