moss_ruby 1.0.0 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/moss_ruby.rb +86 -19
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 661f6ad9b42d35f82f5717e20785c4437c672c7a
4
- data.tar.gz: 32f0decb5024d8baf1afb356b341da5a7e98b959
3
+ metadata.gz: de73a58e506ce9d6bd301468837e0015172bd9e8
4
+ data.tar.gz: 6a42a22ed34ef12ee88520f98679b3b05c67ba72
5
5
  SHA512:
6
- metadata.gz: 0c2439a3d832bbf53f15e20c67707b2beb4ff16a4b03c412bc8af9ac4fc1548d586eb8b26d1d05ebf312ed2e9d83502af8c5424d1314a87f0ae2175bf8bcc79a
7
- data.tar.gz: d2e039a786dce1a571bcc1b7e60882acc3502194794ceeacd4ae33326883f82a2bbcfae910e44d5a29fa9c5dd181dd60db19eed52fca35c2c5ff8883ca737c18
6
+ metadata.gz: 542d17634cb116fee0856d008be17594256d0f5440f776284a94ca93ffaa655fdf2279bd65019142aeff1610373b8f6091644f789f6e6f0a74adc83535ea4a11
7
+ data.tar.gz: 0f72cea79daa7112c91f094c12d28ba27a2b2bf73f2ca7fe0a601db04717d21e47b793ebd8a7608253048fda4ec69103a1adbe0bd562a3daa2183fd3710c72cc
data/lib/moss_ruby.rb CHANGED
@@ -68,14 +68,25 @@ class MossRuby
68
68
  content = IO.read(filename)
69
69
  size = content.length
70
70
 
71
- moss_server.write "file #{id} #{@options[:language]} #{size} #{file}\n"
72
- moss_server.write content
71
+ if size > 0
72
+ moss_server.write "file #{id} #{@options[:language]} #{size} #{file}\n"
73
+ moss_server.write content
74
+ end
75
+ puts "File upload size #{filename} #{size}"
76
+ begin
77
+ line = moss_server.read_nonblock(100)
78
+ puts "--------------------------------------------"
79
+ puts " From MOSS: #{line}"
80
+ puts "--------------------------------------------"
81
+ rescue IO::WaitReadable
82
+ #expected
83
+ end
73
84
  end
74
85
 
75
- def check(files_dict)
86
+ def check(files_dict, callback=nil)
76
87
  # Chech that the files_dict contains valid filenames
77
88
  files_dict[:base_files].each do |file_search|
78
- if Dir[file_search].length == 0
89
+ if Dir.glob(file_search, File::FNM_CASEFOLD).length == 0
79
90
  raise "Unable to locate base file(s) matching #{file_search}"
80
91
  end
81
92
  end
@@ -85,15 +96,17 @@ class MossRuby
85
96
  end
86
97
 
87
98
  files_dict[:files].each do |file_search|
88
- if Dir[file_search].length == 0
99
+ if Dir.glob(file_search, File::FNM_CASEFOLD).length == 0
89
100
  raise "Unable to locate base file(s) matching #{file_search}"
90
101
  end
91
102
  end
92
103
 
93
104
  # Connect to the server
105
+ callback.call('Connecting to MOSS') unless callback.nil?
94
106
  moss_server = TCPSocket.new @server, @port
95
107
  begin
96
108
  # Send header details
109
+ callback.call(' - Sending configuration details') unless callback.nil?
97
110
  moss_server.write "moss #{@userid}\n"
98
111
  moss_server.write "directory #{@options[:directory_submission] ? 1 : 0 }\n"
99
112
  moss_server.write "X #{@options[:experimental_server] ? 1 : 0}\n"
@@ -103,26 +116,43 @@ class MossRuby
103
116
  # Send language option
104
117
  moss_server.write "language #{@options[:language]}\n"
105
118
 
119
+ callback.call(' - Checking language') unless callback.nil?
106
120
  line = moss_server.gets
107
121
  if line.strip() != "yes"
108
122
  moss_server.write "end\n"
109
123
  raise "Invalid language option."
110
124
  end
111
125
 
112
- files_dict[:base_files].each do |file_search|
113
- Dir[file_search].each do |file|
126
+ count = 1
127
+ processing = files_dict[:base_files]
128
+ processing.each do |file_search|
129
+ callback.call(" - Sending base files #{count} of #{processing.count} - #{file_search}") unless callback.nil?
130
+ files = Dir.glob(file_search, File::FNM_CASEFOLD)
131
+ file_count = 1
132
+ files.each do |file|
133
+ callback.call(" - Base file #{file_count} of #{files.count} - #{file}") unless callback.nil?
114
134
  upload_file moss_server, file
135
+ file_count += 1
115
136
  end
137
+ count += 1
116
138
  end
117
139
 
118
140
  idx = 1
119
- files_dict[:files].each do |file_search|
120
- Dir[file_search].each do |file|
141
+ count = 1
142
+ processing = files_dict[:files]
143
+ processing.each do |file_search|
144
+ callback.call(" - Sending files #{count} of #{processing.count} - #{file_search}") unless callback.nil?
145
+ files = Dir.glob(file_search, File::FNM_CASEFOLD)
146
+ file_count = 1
147
+ files.each do |file|
148
+ callback.call(" - File #{idx} = #{file_count} of #{files.count} - #{file}") unless callback.nil?
121
149
  upload_file moss_server, file, idx
122
150
  idx += 1
151
+ file_count += 1
123
152
  end
124
153
  end
125
154
 
155
+ callback.call(" - Waiting for server response") unless callback.nil?
126
156
  moss_server.write "query 0 #{@options[:comment]}\n"
127
157
 
128
158
  result = moss_server.gets
@@ -134,17 +164,29 @@ class MossRuby
134
164
  end
135
165
  end
136
166
 
137
- def extract_results(uri)
167
+ def extract_results(uri, min_pct = 10, callback = nil)
138
168
  result = Array.new
139
169
  begin
140
- match = 0
170
+ match = -1
141
171
  match_file = Array.new
142
172
  data = Array.new
143
- while true
173
+ to_fetch = get_matches(uri, min_pct, callback)
174
+ to_fetch.each do |id|
175
+ match += 1
176
+ callback.call("Checking match #{match + 1} (id #{id})") unless callback.nil?
177
+
144
178
  # read the two match files
145
- match_top = open("#{uri}/match#{match}-top.html").read
146
- match_file[0] = open("#{uri}/match#{match}-0.html").read
147
- match_file[1] = open("#{uri}/match#{match}-1.html").read
179
+ match_top = open("#{uri}/match#{id}-top.html").read
180
+
181
+ callback.call(" - checking match #{match} percents") unless callback.nil?
182
+ top = read_pcts match_top
183
+
184
+ next if Integer(top[:pct0]) < min_pct && Integer(top[:pct1]) < min_pct
185
+
186
+ callback.call(" - fetching #{match} html") unless callback.nil?
187
+
188
+ match_file[0] = open("#{uri}/match#{id}-0.html").read
189
+ match_file[1] = open("#{uri}/match#{id}-1.html").read
148
190
 
149
191
  # puts match_top
150
192
  # puts "---FILE0\n\n"
@@ -152,10 +194,12 @@ class MossRuby
152
194
  # puts "---FILE1\n\n"
153
195
  # puts match_file[1]
154
196
 
197
+ callback.call(" - extracting data for #{match}") unless callback.nil?
198
+
155
199
  data[0] = read_data match_file[0]
156
200
  data[1] = read_data match_file[1]
157
- top = read_pcts match_top
158
-
201
+
202
+ callback.call(" - adding #{match} result") unless callback.nil?
159
203
  result << [
160
204
  {
161
205
  filename: data[0][:filename],
@@ -168,8 +212,6 @@ class MossRuby
168
212
  pct: Integer(top[:pct1])
169
213
  }
170
214
  ]
171
-
172
- match += 1
173
215
  end
174
216
  rescue OpenURI::HTTPError
175
217
  #end when there are no more matches -- indicated by 404 when accessing matches-n-top.html
@@ -180,6 +222,31 @@ class MossRuby
180
222
 
181
223
  private
182
224
 
225
+ # <tr><td><a href="http://moss.stanford.edu/results/916994582/match0.html">./done/COS10009-5/4936175/27258/ (51%)</a>
226
+ # </td><td><a href="http://moss.stanford.edu/results/916994582/match0.html">./done/COS10009-5/4928431/34630/ (51%)</a>
227
+ # </td><td align="right">80
228
+ # </td></tr>
229
+
230
+ def get_matches(uri, min_pct, callback)
231
+ result = Array.new
232
+ begin
233
+ callback.call(" - Reading match data") unless callback.nil?
234
+ page = open("#{uri}").read
235
+ regex = /<TR><TD><A HREF=".*?match(?<match_id>\d+).html">.*\((?<pct0>\d+)%\)<\/A>\n.*?<TD><A.*?((?<pct0>\d+)%\))/i
236
+ # puts "scanning page"
237
+ page.scan(regex).each do | match |
238
+ id, pct0, pct1 = match
239
+ # puts "#{id}, #{pct0}, #{pct1}"
240
+ if Integer(pct0) >= min_pct || Integer(pct1) >= min_pct
241
+ result << id
242
+ end
243
+ end
244
+ callback.call(" - Found #{result.count} match with at least #{min_pct}% similar") unless callback.nil?
245
+ rescue
246
+ end
247
+ result
248
+ end
249
+
183
250
  def strip_a(html)
184
251
  html.gsub(/<A.*?>.*?<\/A>/, '')
185
252
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: moss_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Cain
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-01 00:00:00.000000000 Z
11
+ date: 2015-06-26 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Moss-ruby is an unofficial ruby gem for the Moss system for Detecting
14
14
  Software Plagiarism (http://theory.stanford.edu/~aiken/moss/)