comment_extractor 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (165) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +132 -0
  4. data/bin/comment_parser_debug +45 -0
  5. data/lib/comment_extractor/code_object/comment.rb +19 -0
  6. data/lib/comment_extractor/code_object.rb +12 -0
  7. data/lib/comment_extractor/code_objects.rb +46 -0
  8. data/lib/comment_extractor/configuration.rb +50 -0
  9. data/lib/comment_extractor/encoding.rb +40 -0
  10. data/lib/comment_extractor/extractor/c.rb +8 -0
  11. data/lib/comment_extractor/extractor/cc.rb +8 -0
  12. data/lib/comment_extractor/extractor/class.rb +8 -0
  13. data/lib/comment_extractor/extractor/clojure.rb +11 -0
  14. data/lib/comment_extractor/extractor/coffee.rb +13 -0
  15. data/lib/comment_extractor/extractor/concerns/simple_extractor.rb +189 -0
  16. data/lib/comment_extractor/extractor/concerns/slash_extractor.rb +16 -0
  17. data/lib/comment_extractor/extractor/cpp.rb +8 -0
  18. data/lib/comment_extractor/extractor/cs.rb +8 -0
  19. data/lib/comment_extractor/extractor/css.rb +8 -0
  20. data/lib/comment_extractor/extractor/cxx.rb +8 -0
  21. data/lib/comment_extractor/extractor/d.rb +9 -0
  22. data/lib/comment_extractor/extractor/erlang.rb +12 -0
  23. data/lib/comment_extractor/extractor/fortran.rb +11 -0
  24. data/lib/comment_extractor/extractor/go.rb +8 -0
  25. data/lib/comment_extractor/extractor/h.rb +8 -0
  26. data/lib/comment_extractor/extractor/haml.rb +49 -0
  27. data/lib/comment_extractor/extractor/haskell.rb +12 -0
  28. data/lib/comment_extractor/extractor/hpp.rb +8 -0
  29. data/lib/comment_extractor/extractor/html.rb +13 -0
  30. data/lib/comment_extractor/extractor/java.rb +8 -0
  31. data/lib/comment_extractor/extractor/java_script.rb +12 -0
  32. data/lib/comment_extractor/extractor/lisp.rb +11 -0
  33. data/lib/comment_extractor/extractor/lua.rb +12 -0
  34. data/lib/comment_extractor/extractor/m.rb +9 -0
  35. data/lib/comment_extractor/extractor/markdown.rb +7 -0
  36. data/lib/comment_extractor/extractor/mm.rb +8 -0
  37. data/lib/comment_extractor/extractor/perl.rb +12 -0
  38. data/lib/comment_extractor/extractor/php.rb +8 -0
  39. data/lib/comment_extractor/extractor/python.rb +13 -0
  40. data/lib/comment_extractor/extractor/ruby.rb +40 -0
  41. data/lib/comment_extractor/extractor/sass.rb +8 -0
  42. data/lib/comment_extractor/extractor/scala.rb +8 -0
  43. data/lib/comment_extractor/extractor/scss.rb +8 -0
  44. data/lib/comment_extractor/extractor/shell.rb +11 -0
  45. data/lib/comment_extractor/extractor/sqf.rb +8 -0
  46. data/lib/comment_extractor/extractor/sql.rb +12 -0
  47. data/lib/comment_extractor/extractor/sqs.rb +7 -0
  48. data/lib/comment_extractor/extractor/tex.rb +12 -0
  49. data/lib/comment_extractor/extractor/text.rb +10 -0
  50. data/lib/comment_extractor/extractor/yaml.rb +12 -0
  51. data/lib/comment_extractor/extractor.rb +96 -0
  52. data/lib/comment_extractor/extractor_manager.rb +158 -0
  53. data/lib/comment_extractor/file.rb +42 -0
  54. data/lib/comment_extractor/parser.rb +33 -0
  55. data/lib/comment_extractor/smart_string_scanner.rb +11 -0
  56. data/lib/comment_extractor/version.rb +4 -0
  57. data/lib/comment_extractor.rb +18 -0
  58. data/spec/assets/binary_file +0 -0
  59. data/spec/assets/shebang_file +3 -0
  60. data/spec/assets/source_code/c.c +158 -0
  61. data/spec/assets/source_code/cc.cc +24 -0
  62. data/spec/assets/source_code/class +0 -0
  63. data/spec/assets/source_code/clojure.clj +41 -0
  64. data/spec/assets/source_code/coffee.coffee +27 -0
  65. data/spec/assets/source_code/cpp.cpp +130 -0
  66. data/spec/assets/source_code/cs.cs +53 -0
  67. data/spec/assets/source_code/css.css +37 -0
  68. data/spec/assets/source_code/cxx +0 -0
  69. data/spec/assets/source_code/d.d +110 -0
  70. data/spec/assets/source_code/erlang.es +34 -0
  71. data/spec/assets/source_code/fortran.f +41 -0
  72. data/spec/assets/source_code/golang.go +61 -0
  73. data/spec/assets/source_code/h +0 -0
  74. data/spec/assets/source_code/haml.haml +26 -0
  75. data/spec/assets/source_code/haskell.hs +36 -0
  76. data/spec/assets/source_code/hpp +0 -0
  77. data/spec/assets/source_code/html.html +139 -0
  78. data/spec/assets/source_code/java.java +39 -0
  79. data/spec/assets/source_code/java_script.js +164 -0
  80. data/spec/assets/source_code/lisp.el +18 -0
  81. data/spec/assets/source_code/lua.lua +34 -0
  82. data/spec/assets/source_code/m +0 -0
  83. data/spec/assets/source_code/mm +0 -0
  84. data/spec/assets/source_code/perl.pl +36 -0
  85. data/spec/assets/source_code/php.php +31 -0
  86. data/spec/assets/source_code/python.py +139 -0
  87. data/spec/assets/source_code/ruby.rb +36 -0
  88. data/spec/assets/source_code/sass.sass +77 -0
  89. data/spec/assets/source_code/scala.scala +46 -0
  90. data/spec/assets/source_code/scss.scss +93 -0
  91. data/spec/assets/source_code/shell.sh +5 -0
  92. data/spec/assets/source_code/sqf +0 -0
  93. data/spec/assets/source_code/sql.sql +11 -0
  94. data/spec/assets/source_code/sqs +0 -0
  95. data/spec/assets/source_code/tex.tex +20 -0
  96. data/spec/assets/source_code/text.txt +15 -0
  97. data/spec/assets/source_code/vim +17 -0
  98. data/spec/assets/source_code/yaml.yml +44 -0
  99. data/spec/assets/stripper/children/children +0 -0
  100. data/spec/assets/stripper/children/children.c +0 -0
  101. data/spec/assets/stripper/children/children.js +0 -0
  102. data/spec/assets/stripper/children/children.o +0 -0
  103. data/spec/assets/stripper/children/children.rb +1 -0
  104. data/spec/assets/stripper/test +0 -0
  105. data/spec/assets/stripper/test.c +0 -0
  106. data/spec/assets/stripper/test.js +0 -0
  107. data/spec/assets/stripper/test.o +0 -0
  108. data/spec/assets/stripper/test.rb +1 -0
  109. data/spec/comment_extractor/code_object/comment_spec.rb +15 -0
  110. data/spec/comment_extractor/code_object_spec.rb +18 -0
  111. data/spec/comment_extractor/code_objects_spec.rb +66 -0
  112. data/spec/comment_extractor/configuration_spec.rb +68 -0
  113. data/spec/comment_extractor/encoding_spec.rb +77 -0
  114. data/spec/comment_extractor/extractor/c_spec.rb +9 -0
  115. data/spec/comment_extractor/extractor/cc_spec.rb +9 -0
  116. data/spec/comment_extractor/extractor/class_spec.rb +9 -0
  117. data/spec/comment_extractor/extractor/clojure_spec.rb +9 -0
  118. data/spec/comment_extractor/extractor/coffee_spec.rb +9 -0
  119. data/spec/comment_extractor/extractor/cpp_spec.rb +9 -0
  120. data/spec/comment_extractor/extractor/cs_spec.rb +9 -0
  121. data/spec/comment_extractor/extractor/css_spec.rb +9 -0
  122. data/spec/comment_extractor/extractor/cxx_spec.rb +9 -0
  123. data/spec/comment_extractor/extractor/d_spec.rb +10 -0
  124. data/spec/comment_extractor/extractor/erlang_spec.rb +10 -0
  125. data/spec/comment_extractor/extractor/fortran_spec.rb +9 -0
  126. data/spec/comment_extractor/extractor/go_spec.rb +9 -0
  127. data/spec/comment_extractor/extractor/h_spec.rb +9 -0
  128. data/spec/comment_extractor/extractor/haml_spec.rb +9 -0
  129. data/spec/comment_extractor/extractor/haskell_spec.rb +9 -0
  130. data/spec/comment_extractor/extractor/hpp_spec.rb +9 -0
  131. data/spec/comment_extractor/extractor/html_spec.rb +9 -0
  132. data/spec/comment_extractor/extractor/java_script_spec.rb +10 -0
  133. data/spec/comment_extractor/extractor/java_spec.rb +9 -0
  134. data/spec/comment_extractor/extractor/lisp_spec.rb +9 -0
  135. data/spec/comment_extractor/extractor/lua_spec.rb +9 -0
  136. data/spec/comment_extractor/extractor/m_spec.rb +9 -0
  137. data/spec/comment_extractor/extractor/markdown_spec.rb +8 -0
  138. data/spec/comment_extractor/extractor/mm_spec.rb +9 -0
  139. data/spec/comment_extractor/extractor/perl_spec.rb +9 -0
  140. data/spec/comment_extractor/extractor/php_spec.rb +9 -0
  141. data/spec/comment_extractor/extractor/python_spec.rb +9 -0
  142. data/spec/comment_extractor/extractor/ruby_spec.rb +12 -0
  143. data/spec/comment_extractor/extractor/sass_spec.rb +9 -0
  144. data/spec/comment_extractor/extractor/scala_spec.rb +9 -0
  145. data/spec/comment_extractor/extractor/scss_spec.rb +9 -0
  146. data/spec/comment_extractor/extractor/shell_spec.rb +9 -0
  147. data/spec/comment_extractor/extractor/sqf_spec.rb +9 -0
  148. data/spec/comment_extractor/extractor/sql_spec.rb +9 -0
  149. data/spec/comment_extractor/extractor/sqs_spec.rb +9 -0
  150. data/spec/comment_extractor/extractor/tex_spec.rb +9 -0
  151. data/spec/comment_extractor/extractor/text_spec.rb +7 -0
  152. data/spec/comment_extractor/extractor/yaml_spec.rb +9 -0
  153. data/spec/comment_extractor/extractor_manager_spec.rb +233 -0
  154. data/spec/comment_extractor/extractor_spec.rb +102 -0
  155. data/spec/comment_extractor/file_spec.rb +100 -0
  156. data/spec/comment_extractor/parser_spec.rb +67 -0
  157. data/spec/comment_extractor/smart_string_scanner_spec.rb +24 -0
  158. data/spec/comment_extractor/version_spec.rb +8 -0
  159. data/spec/comment_extractor_spec.rb +15 -0
  160. data/spec/spec_helper.rb +22 -0
  161. data/spec/support/rspec/comment_extractor/extractor_example_group.rb +115 -0
  162. data/spec/support/rspec/comment_extractor/matchers/extract_comment.rb +58 -0
  163. data/spec/support/rspec/comment_extractor/matchers.rb +7 -0
  164. data/spec/support/rspec/comment_extractor.rb +6 -0
  165. metadata +370 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 81a66d3881e6e790f7b4fbd5626661f6b6b7ebce
4
+ data.tar.gz: eef85195b570687ca4e6eafbbe841de336884233
5
+ SHA512:
6
+ metadata.gz: 918fd7799b29841b8d1ca3a23329ba8c59f48635806dfb29b68e993ec85f20150fe33170db4eb3c228b2d7326503ac76726fca9bbe06f364eac489e81c84d0d2
7
+ data.tar.gz: ee4337e67ac7c371d0cad3b83991b379b9d0dcc97e4c709935750b82fc277b75107d4874272876c9954a30dc893f7be16b901e93ca5a6242afe0610d5b11cfe6
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) <2014> <Ishii Hiroyuki>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,132 @@
1
+ # comment\_extractor
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/comment_extractor.png)](http://badge.fury.io/rb/comment\_extractor)
4
+ [![Build Status](https://travis-ci.org/alpaca-tc/comment_extractor.png?branch=v1.0.0)](https://travis-ci.org/alpaca-tc/comment\_parser)
5
+ [![Coverage Status](https://coveralls.io/repos/alpaca-tc/comment_extractor/badge.png?branch=v1.0.0)](https://coveralls.io/r/alpaca-tc/comment\_extractor?branch=v1.0.0)
6
+
7
+ ## Description
8
+
9
+ comment\_extractor extracts the comment out from a source code.
10
+
11
+ ## Installation
12
+
13
+ CommentExtractor has been tested with ruby 2.1.
14
+
15
+ ```sh
16
+ git clone https://github.com/alpaca-tc/comment_extractor
17
+ cd comment_extractor
18
+ rake install
19
+ ```
20
+
21
+ ## Usage
22
+
23
+ ### Parser
24
+
25
+ Given a file path to `Parser.for`, it finds Extractor and returns an instance of self which is initialized by extractor. Getting the comments from file by using it.
26
+
27
+ ```ruby
28
+ require 'comment_extractor'
29
+
30
+ path = 'path/to/file'
31
+ if parser = CommentExtractor::Parser.for(path)
32
+ comments = parser.parse
33
+ comemnts.is_a?(CommentExtractor::CodeObjects)
34
+
35
+ comment = comments.first
36
+ comment.file #=> 'path/to/file'
37
+ comment.line #=> 1
38
+ comment.value #=> 'I am a comment'
39
+ end
40
+ ```
41
+
42
+ ### Extractor
43
+
44
+ #### You can use Extractor directly.
45
+
46
+ ```ruby
47
+ require 'comment_extractor'
48
+
49
+ file_path = 'path/to/file.rb'
50
+ manager = CommentExtractor::ExtractorManager
51
+ if extractor = manager.can_extract(file_path)
52
+ content = File.read(file_path)
53
+ comments = extractor.new(content).extract_comments
54
+ comemnts.is_a?(CommentExtractor::CodeObjects)
55
+ end
56
+
57
+ # Other way to find extractor
58
+ extractor = manager.find_extractor_by_shebang('#! /usr/local/bin/ruby')
59
+ extractor = manager.find_extractor_by_filename('path/to/file.rb')
60
+ extractor = manager.find_extractor_by_filetype('ruby')
61
+ ```
62
+
63
+ #### How to use extractor of specific filetype.
64
+
65
+ ```ruby
66
+ require 'comment_extractor/extractor/d'
67
+
68
+ content = File.read('path/to/file.d')
69
+ comments = CommentExtractor::Extractor::D.new(content).extract_comments
70
+ ```
71
+
72
+ ### Supported FileTypes
73
+
74
+ - **Bash / Zsh**
75
+ - **C / C++**
76
+ - **Class**
77
+ - **C#**
78
+ - **Clojure**
79
+ - **Coffee-Script**
80
+ - **D**
81
+ - **EmacsLisp**
82
+ - **Erlang**
83
+ - **Fortran**
84
+ - **Go**
85
+ - **Haml**
86
+ - **Haskell**
87
+ - **HTML**
88
+ - **Java**
89
+ - **JavaScript**
90
+ - **Tex**
91
+ - **Lua**
92
+ - **PHP**
93
+ - **Perl**
94
+ - **Python**
95
+ - **Ruby**
96
+ - **SASS**
97
+ - **SCSS**
98
+ - **SQF**
99
+ - **SQL**
100
+ - **Scala**
101
+
102
+ ### TODO
103
+
104
+ - Markdown
105
+ - SQS; I can not implement it because I do not know the syntax of sqs.
106
+
107
+ ### Create a new Extractor
108
+
109
+ If you see something missing from the supported file type, please either file an issue or submit a pull request:)
110
+ And I would be glad if I could have you send the new filetype's source code via an issues.
111
+
112
+ ```ruby
113
+ # lib/comment_extractor/extractor/file_type.rb
114
+ require 'comment_extractor/extractor'
115
+
116
+ class CommentExtractor::Extractor::FileType < CommentExtractor::Extractor
117
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
118
+
119
+ shebang /ruby$/ # (Optional)
120
+ filename /\.(extention)$/ # (Required)
121
+ filetype 'filetype' # (Required) file type name. g.c 'ruby', 'python'
122
+
123
+ # define_ignore_patterns(*given regexp)
124
+
125
+ # define_bracket('"') #=> define_ignore_patterns(/".*?(?<!\\)"/)
126
+ # define_regexp_bracket #=> define_ignore_patterns(%r!/(?=[^/])!, /(?<!\\)\//)
127
+
128
+ # define the rule of comment
129
+ comment start_with: /;+/
130
+ comment start_with: /;--/, end_with: /--\|/, type: BLOCK_COMMENT
131
+ end
132
+ ```
@@ -0,0 +1,45 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ $:.unshift File.expand_path('../../lib', __FILE__)
4
+ require 'comment_extractor'
5
+
6
+ class CommentExtractor::Debugger
7
+ def initialize(file_path)
8
+ @files = if File.file?(file_path)
9
+ [file_path]
10
+ elsif File.directory?(file_path)
11
+ Dir["#{file_path}/**/*"].select { |f| File.file?(f) }
12
+ end
13
+ @debug = Hash.new { |h,k| h[k] = [] }
14
+ end
15
+
16
+ def parse_all_files
17
+ @files.each do |file|
18
+ parse_file(file)
19
+ end
20
+
21
+ puts "Parser is not found"
22
+ puts @debug[:parser_not_found].join("\n")
23
+ end
24
+
25
+ def parse_file(file_path)
26
+ puts "Open: #{file_path}"
27
+
28
+ if parser = CommentExtractor::Parser.for(file_path)
29
+ puts "Use: #{parser.extractor.class}"
30
+ comments = parser.extract_comments
31
+ else
32
+ @debug[:parser_not_found] << file_path
33
+ end
34
+ end
35
+ end
36
+
37
+ CommentExtractor.configure do |c|
38
+ c.use_default_extractor = false
39
+ end
40
+
41
+ file_path = ARGV.first
42
+ exit unless file_path
43
+
44
+ debugger = CommentExtractor::Debugger.new(file_path)
45
+ debugger.parse_all_files
@@ -0,0 +1,19 @@
1
+ require 'comment_extractor/code_object'
2
+
3
+ module CommentExtractor
4
+ class CodeObject
5
+ class Comment < CommentExtractor::CodeObject
6
+ attr_accessor :line
7
+
8
+ module Type
9
+ ONE_LINER_COMMENT = :one_liner_comment
10
+ BLOCK_COMMENT = :block_comment
11
+ end
12
+
13
+ def initialize(line: line, **values)
14
+ super(**values)
15
+ @line = line
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,12 @@
1
+ module CommentExtractor
2
+ class CodeObject
3
+ attr_accessor :metadata, :value
4
+
5
+ def initialize(value: nil, **others)
6
+ @value = value
7
+ @metadata = others
8
+ end
9
+ end
10
+ end
11
+
12
+ require 'comment_extractor/code_object/comment'
@@ -0,0 +1,46 @@
1
+ require 'delegate'
2
+
3
+ module CommentExtractor
4
+ class CodeObjects < DelegateClass(Array)
5
+ attr_accessor :file
6
+
7
+ def initialize(file: nil)
8
+ @file = file
9
+ super([])
10
+ end
11
+
12
+ def <<(code_object)
13
+ super(initialize_code_object(code_object))
14
+ end
15
+
16
+ def push(*code_object_array)
17
+ arguments = code_object_array.map { |v| initialize_code_object(v) }
18
+ super(*arguments)
19
+ end
20
+
21
+ def concat(*code_object_arrays)
22
+ arguments = code_object_arrays.flatten.map { |v| initialize_code_object(v) }
23
+ super(arguments)
24
+ end
25
+
26
+ def inspect
27
+ attributes = instance_variables.map { |v| "@#{v}=#{instance_variable_get(v)}" }
28
+ attributes = attributes.empty? ? '' : " #{attributes.join(', ')}"
29
+ object_id = '0x%x' % (self.object_id << 1)
30
+ "#<#{self.class}:#{object_id}#{attributes}>"
31
+ end
32
+ alias :to_s :inspect
33
+
34
+ private
35
+
36
+ def initialize_code_object(code_object)
37
+ unless code_object.is_a?(CodeObject)
38
+ message = "no implicit conversion of #{code_object.class} into #{CodeObject}"
39
+ raise TypeError, message
40
+ end
41
+
42
+ code_object.metadata[:parent] = self
43
+ code_object
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,50 @@
1
+ require 'comment_extractor/extractor_manager'
2
+ require 'comment_extractor/extractor/text'
3
+
4
+ module CommentExtractor
5
+ class Configuration
6
+ @@required_attributes = {}
7
+
8
+ def initialize(attributes = {})
9
+ attributes.each do |key, value|
10
+ method_name = "#{key}="
11
+ send(method_name, value) if respond_to?(method_name)
12
+ end
13
+
14
+ @@required_attributes.each_key do |key|
15
+ raise "Unable to initialize #{key} without attribute" unless self.send(key)
16
+ end
17
+
18
+ self.extractors = ExtractorManager.default_extractors
19
+ self.default_extractor = Extractor::Text
20
+ self.use_default_extractor = true
21
+ end
22
+
23
+ def self.add_setting(name, opts={})
24
+ attr_accessor name
25
+
26
+ define_predicate_for(name) if opts.delete(:predicate)
27
+ define_required_attribute(name) if opts.delete(:required)
28
+ end
29
+
30
+ private
31
+
32
+ def self.define_required_attribute(*names)
33
+ names.each do |name|
34
+ @@required_attributes[name] = nil
35
+ end
36
+ end
37
+
38
+ def self.define_predicate_for(*names)
39
+ names.each do |name|
40
+ define_method "#{name}?" do
41
+ !!send(name)
42
+ end
43
+ end
44
+ end
45
+
46
+ add_setting :extractors
47
+ add_setting :default_extractor
48
+ add_setting :use_default_extractor
49
+ end
50
+ end
@@ -0,0 +1,40 @@
1
+ module CommentExtractor
2
+ module Encoding
3
+ def self.read_file(file_path, encoding = ::Encoding.default_external)
4
+ content = File.open(file_path, 'rb') { |f| f.read }
5
+ self.encode(content)
6
+ end
7
+
8
+ def self.encode(content, encoding = ::Encoding.default_external)
9
+ windows_platforms = Regexp.new(%w[mingw mswin].join('|'))
10
+ content.gsub!("\r\n", "\n") if RUBY_PLATFORM =~ windows_platforms
11
+
12
+ original_encoding = content.encoding
13
+
14
+ if strip_bom(content) # When the content contains bom, it is UTF-8
15
+ content.force_encoding(::Encoding::UTF_8)
16
+ content.encode!(encoding)
17
+ else
18
+ content.force_encoding(encoding)
19
+ end
20
+
21
+ unless content.valid_encoding?
22
+ content.force_encoding(original_encoding)
23
+ content.encode!(encoding)
24
+ end
25
+
26
+ unless content.valid_encoding?
27
+ raise "Unable to convert #{file_path} to #{encoding}"
28
+ end
29
+
30
+ content
31
+ end
32
+
33
+ private
34
+
35
+ def self.strip_bom(content)
36
+ bom_regexp = /\A\xef\xbb\xbf/
37
+ content.sub!(bom_regexp, '')
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::C < CommentExtractor::Extractor
4
+ filename /\.c$/
5
+ filetype 'c'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Cc < CommentExtractor::Extractor
4
+ filename /\.cc$/
5
+ filetype 'cc'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Class < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
5
+
6
+ filename /\.class$/
7
+ filetype 'class'
8
+ end
@@ -0,0 +1,11 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Clojure < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.clj$/
7
+ filetype 'clojure'
8
+
9
+ define_default_bracket
10
+ comment start_with: /;+/
11
+ end
@@ -0,0 +1,13 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Coffee < CommentExtractor::Extractor
4
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
5
+
6
+ filename /\.coffee$/
7
+ filetype 'coffee'
8
+
9
+ define_default_bracket
10
+ define_regexp_bracket
11
+ comment start_with: '###', end_with: '###', type: BLOCK_COMMENT
12
+ comment start_with: '#'
13
+ end
@@ -0,0 +1,189 @@
1
+ require 'comment_extractor/smart_string_scanner'
2
+
3
+ using CommentExtractor::SmartStringScanner
4
+
5
+ class CommentExtractor::Extractor
6
+ module Concerns
7
+ module SimpleExtractor
8
+ include CommentExtractor::CodeObject::Comment::Type
9
+
10
+ def self.included(k)
11
+ k.class_eval do |klass|
12
+ extend ClassMethods
13
+ end
14
+ end
15
+
16
+ def self.attr_definition(*keys)
17
+ keys.each do |key|
18
+ define_method key do
19
+ self.class.instance_variable_get("@#{key}") || []
20
+ end
21
+ end
22
+ end
23
+ attr_definition :brackets, :comment_regexp,
24
+ :ignore_patterns, :complicate_conditions
25
+
26
+ module ClassMethods
27
+ include CommentExtractor::CodeObject::Comment::Type
28
+
29
+ def included(k)
30
+ self.instance_variables.each do |key|
31
+ k.instance_variable_set(key, self.instance_variable_get(key))
32
+ end
33
+ end
34
+
35
+ def comment(start_with: nil, end_with: nil, type: ONE_LINER_COMMENT)
36
+ @comment_regexp ||= []
37
+ raise ArgumentError unless [type, start_with].all?
38
+
39
+ definition = { start_with: build_regexp(start_with), type: type, end_with: end_with }
40
+
41
+ if type == BLOCK_COMMENT
42
+ definition[:end_with] = build_regexp(end_with, Regexp::MULTILINE)
43
+ end
44
+ @comment_regexp << definition
45
+ end
46
+
47
+ def define_ignore_patterns(*patterns)
48
+ @ignore_patterns ||= []
49
+ @ignore_patterns += patterns
50
+ end
51
+
52
+ def define_bracket(bracket, options = 0)
53
+ start_regexp = build_regexp(bracket)
54
+ stop_regexp = if bracket.is_a?(Regexp)
55
+ join_regexp(/(?<!\\)/, bracket)
56
+ else
57
+ /(?<!\\)#{bracket}/
58
+ end
59
+ stop_regexp = Regexp.new(stop_regexp.source, options)
60
+ append_bracket(start_regexp, stop_regexp)
61
+ end
62
+
63
+ def define_regexp_bracket
64
+ append_bracket(%r!/(?=[^/])!, /(?<!\\)\//)
65
+ end
66
+
67
+ def define_default_bracket
68
+ define_bracket('"', Regexp::MULTILINE)
69
+ define_bracket("'", Regexp::MULTILINE)
70
+ end
71
+
72
+ def append_bracket(start_with, end_with)
73
+ @brackets ||= []
74
+ @brackets << { start_with: start_with, end_with: end_with }
75
+ end
76
+
77
+ def define_complicate_condition(&proc_object)
78
+ @complicate_conditions ||= []
79
+ @complicate_conditions << proc_object
80
+ end
81
+
82
+ private
83
+
84
+ def join_regexp(*regexp)
85
+ # [review] - Should I ignore regexp options?
86
+ Regexp.new(regexp.map { |v| v.source }.inject(:+))
87
+ end
88
+
89
+ def build_regexp(str_or_reg, type = 0)
90
+ str_or_reg = str_or_reg.source if str_or_reg.respond_to?(:source)
91
+ Regexp.new(str_or_reg, type)
92
+ end
93
+ end
94
+
95
+ def scan
96
+ until scanner.eos?
97
+ case
98
+ when scan_ignore_patterns
99
+ next
100
+ when scan_complicate_conditions
101
+ next
102
+ when scan_comment
103
+ next
104
+ when scan_bracket
105
+ next
106
+ when scanner.scan(CommentExtractor::Extractor::REGEXP[:BREAK])
107
+ next
108
+ when scanner.scan(/./)
109
+ next
110
+ else
111
+ raise_report
112
+ end
113
+ end
114
+ end
115
+
116
+ private
117
+
118
+ def scan_complicate_conditions
119
+ complicate_conditions.each do |proc_object|
120
+ return if self.instance_eval(&proc_object)
121
+ end
122
+
123
+ nil
124
+ end
125
+
126
+ def scan_bracket
127
+ brackets.each do |definition|
128
+ start_with = definition[:start_with]
129
+ end_with = definition[:end_with]
130
+ next unless scanner.scan(start_with)
131
+
132
+ new_regexp = Regexp.new(/.*?/.source + end_with.source, end_with.options)
133
+ return scanner.scan(new_regexp)
134
+ end
135
+
136
+ nil
137
+ end
138
+
139
+ def scan_ignore_patterns
140
+ ignore_patterns.each do |pattern|
141
+ return true if scanner.scan(pattern)
142
+ end
143
+
144
+ nil
145
+ end
146
+
147
+ def scan_comment
148
+ comment_regexp.each do |definition|
149
+ next unless scanner.scan(definition[:start_with])
150
+
151
+ result = case definition[:type]
152
+ when ONE_LINER_COMMENT
153
+ identify_single_line_comment
154
+ when BLOCK_COMMENT
155
+ identify_multi_line_comment(definition[:end_with])
156
+ else
157
+ raise_report
158
+ end
159
+
160
+ return result
161
+ end
162
+
163
+ nil
164
+ end
165
+
166
+ def identify_single_line_comment
167
+ line_number = scanner.current_line
168
+ comment = scanner.scan(/^.*$/)
169
+ metadata = { type: ONE_LINER_COMMENT }
170
+ comment_object = build_comment(line_number, comment, **metadata)
171
+
172
+ code_objects << comment_object
173
+ end
174
+
175
+ def identify_multi_line_comment(regexp)
176
+ line_no = scanner.current_line
177
+ stop_regexp = Regexp.new(/.*?/.source + regexp.source, regexp.options)
178
+ comment_block = scanner.scan(stop_regexp)
179
+
180
+ remove_tail_regexp = Regexp.new(regexp.source + /$/.source)
181
+ comments = comment_block.sub(remove_tail_regexp, '').split("\n")
182
+ comments.each_with_index do |comment, index|
183
+ metadata = { type: BLOCK_COMMENT }
184
+ code_objects << build_comment(line_no + index, comment, metadata)
185
+ end
186
+ end
187
+ end
188
+ end
189
+ end
@@ -0,0 +1,16 @@
1
+ require 'comment_extractor/extractor'
2
+ require 'comment_extractor/extractor/concerns/simple_extractor'
3
+
4
+ module CommentExtractor
5
+ class Extractor
6
+ module Concerns
7
+ module SlashExtractor
8
+ include CommentExtractor::Extractor::Concerns::SimpleExtractor
9
+
10
+ define_default_bracket
11
+ comment start_with: /\/\//
12
+ comment start_with: /\/\*/, end_with: /\*\//, type: BLOCK_COMMENT
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Cpp < CommentExtractor::Extractor
4
+ filename /\.cpp$/
5
+ filetype 'cpp'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Cs < CommentExtractor::Extractor
4
+ filename /\.cs$/
5
+ filetype 'cs'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Css < CommentExtractor::Extractor
4
+ filename /\.css$/
5
+ filetype 'css'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,8 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::Cxx < CommentExtractor::Extractor
4
+ filename /\.cxx$/
5
+ filetype 'cxx'
6
+
7
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
8
+ end
@@ -0,0 +1,9 @@
1
+ require 'comment_extractor/extractor'
2
+
3
+ class CommentExtractor::Extractor::D < CommentExtractor::Extractor
4
+ shebang /\/dmd/
5
+ filename /\.d$/
6
+ filetype 'd'
7
+
8
+ include CommentExtractor::Extractor::Concerns::SlashExtractor
9
+ end