moss_ruby 1.0.0 → 1.0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/moss_ruby.rb +86 -19
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 661f6ad9b42d35f82f5717e20785c4437c672c7a
4
- data.tar.gz: 32f0decb5024d8baf1afb356b341da5a7e98b959
3
+ metadata.gz: de73a58e506ce9d6bd301468837e0015172bd9e8
4
+ data.tar.gz: 6a42a22ed34ef12ee88520f98679b3b05c67ba72
5
5
  SHA512:
6
- metadata.gz: 0c2439a3d832bbf53f15e20c67707b2beb4ff16a4b03c412bc8af9ac4fc1548d586eb8b26d1d05ebf312ed2e9d83502af8c5424d1314a87f0ae2175bf8bcc79a
7
- data.tar.gz: d2e039a786dce1a571bcc1b7e60882acc3502194794ceeacd4ae33326883f82a2bbcfae910e44d5a29fa9c5dd181dd60db19eed52fca35c2c5ff8883ca737c18
6
+ metadata.gz: 542d17634cb116fee0856d008be17594256d0f5440f776284a94ca93ffaa655fdf2279bd65019142aeff1610373b8f6091644f789f6e6f0a74adc83535ea4a11
7
+ data.tar.gz: 0f72cea79daa7112c91f094c12d28ba27a2b2bf73f2ca7fe0a601db04717d21e47b793ebd8a7608253048fda4ec69103a1adbe0bd562a3daa2183fd3710c72cc
data/lib/moss_ruby.rb CHANGED
@@ -68,14 +68,25 @@ class MossRuby
68
68
  content = IO.read(filename)
69
69
  size = content.length
70
70
 
71
- moss_server.write "file #{id} #{@options[:language]} #{size} #{file}\n"
72
- moss_server.write content
71
+ if size > 0
72
+ moss_server.write "file #{id} #{@options[:language]} #{size} #{file}\n"
73
+ moss_server.write content
74
+ end
75
+ puts "File upload size #{filename} #{size}"
76
+ begin
77
+ line = moss_server.read_nonblock(100)
78
+ puts "--------------------------------------------"
79
+ puts " From MOSS: #{line}"
80
+ puts "--------------------------------------------"
81
+ rescue IO::WaitReadable
82
+ #expected
83
+ end
73
84
  end
74
85
 
75
- def check(files_dict)
86
+ def check(files_dict, callback=nil)
76
87
  # Chech that the files_dict contains valid filenames
77
88
  files_dict[:base_files].each do |file_search|
78
- if Dir[file_search].length == 0
89
+ if Dir.glob(file_search, File::FNM_CASEFOLD).length == 0
79
90
  raise "Unable to locate base file(s) matching #{file_search}"
80
91
  end
81
92
  end
@@ -85,15 +96,17 @@ class MossRuby
85
96
  end
86
97
 
87
98
  files_dict[:files].each do |file_search|
88
- if Dir[file_search].length == 0
99
+ if Dir.glob(file_search, File::FNM_CASEFOLD).length == 0
89
100
  raise "Unable to locate base file(s) matching #{file_search}"
90
101
  end
91
102
  end
92
103
 
93
104
  # Connect to the server
105
+ callback.call('Connecting to MOSS') unless callback.nil?
94
106
  moss_server = TCPSocket.new @server, @port
95
107
  begin
96
108
  # Send header details
109
+ callback.call(' - Sending configuration details') unless callback.nil?
97
110
  moss_server.write "moss #{@userid}\n"
98
111
  moss_server.write "directory #{@options[:directory_submission] ? 1 : 0 }\n"
99
112
  moss_server.write "X #{@options[:experimental_server] ? 1 : 0}\n"
@@ -103,26 +116,43 @@ class MossRuby
103
116
  # Send language option
104
117
  moss_server.write "language #{@options[:language]}\n"
105
118
 
119
+ callback.call(' - Checking language') unless callback.nil?
106
120
  line = moss_server.gets
107
121
  if line.strip() != "yes"
108
122
  moss_server.write "end\n"
109
123
  raise "Invalid language option."
110
124
  end
111
125
 
112
- files_dict[:base_files].each do |file_search|
113
- Dir[file_search].each do |file|
126
+ count = 1
127
+ processing = files_dict[:base_files]
128
+ processing.each do |file_search|
129
+ callback.call(" - Sending base files #{count} of #{processing.count} - #{file_search}") unless callback.nil?
130
+ files = Dir.glob(file_search, File::FNM_CASEFOLD)
131
+ file_count = 1
132
+ files.each do |file|
133
+ callback.call(" - Base file #{file_count} of #{files.count} - #{file}") unless callback.nil?
114
134
  upload_file moss_server, file
135
+ file_count += 1
115
136
  end
137
+ count += 1
116
138
  end
117
139
 
118
140
  idx = 1
119
- files_dict[:files].each do |file_search|
120
- Dir[file_search].each do |file|
141
+ count = 1
142
+ processing = files_dict[:files]
143
+ processing.each do |file_search|
144
+ callback.call(" - Sending files #{count} of #{processing.count} - #{file_search}") unless callback.nil?
145
+ files = Dir.glob(file_search, File::FNM_CASEFOLD)
146
+ file_count = 1
147
+ files.each do |file|
148
+ callback.call(" - File #{idx} = #{file_count} of #{files.count} - #{file}") unless callback.nil?
121
149
  upload_file moss_server, file, idx
122
150
  idx += 1
151
+ file_count += 1
123
152
  end
124
153
  end
125
154
 
155
+ callback.call(" - Waiting for server response") unless callback.nil?
126
156
  moss_server.write "query 0 #{@options[:comment]}\n"
127
157
 
128
158
  result = moss_server.gets
@@ -134,17 +164,29 @@ class MossRuby
134
164
  end
135
165
  end
136
166
 
137
- def extract_results(uri)
167
+ def extract_results(uri, min_pct = 10, callback = nil)
138
168
  result = Array.new
139
169
  begin
140
- match = 0
170
+ match = -1
141
171
  match_file = Array.new
142
172
  data = Array.new
143
- while true
173
+ to_fetch = get_matches(uri, min_pct, callback)
174
+ to_fetch.each do |id|
175
+ match += 1
176
+ callback.call("Checking match #{match + 1} (id #{id})") unless callback.nil?
177
+
144
178
  # read the two match files
145
- match_top = open("#{uri}/match#{match}-top.html").read
146
- match_file[0] = open("#{uri}/match#{match}-0.html").read
147
- match_file[1] = open("#{uri}/match#{match}-1.html").read
179
+ match_top = open("#{uri}/match#{id}-top.html").read
180
+
181
+ callback.call(" - checking match #{match} percents") unless callback.nil?
182
+ top = read_pcts match_top
183
+
184
+ next if Integer(top[:pct0]) < min_pct && Integer(top[:pct1]) < min_pct
185
+
186
+ callback.call(" - fetching #{match} html") unless callback.nil?
187
+
188
+ match_file[0] = open("#{uri}/match#{id}-0.html").read
189
+ match_file[1] = open("#{uri}/match#{id}-1.html").read
148
190
 
149
191
  # puts match_top
150
192
  # puts "---FILE0\n\n"
@@ -152,10 +194,12 @@ class MossRuby
152
194
  # puts "---FILE1\n\n"
153
195
  # puts match_file[1]
154
196
 
197
+ callback.call(" - extracting data for #{match}") unless callback.nil?
198
+
155
199
  data[0] = read_data match_file[0]
156
200
  data[1] = read_data match_file[1]
157
- top = read_pcts match_top
158
-
201
+
202
+ callback.call(" - adding #{match} result") unless callback.nil?
159
203
  result << [
160
204
  {
161
205
  filename: data[0][:filename],
@@ -168,8 +212,6 @@ class MossRuby
168
212
  pct: Integer(top[:pct1])
169
213
  }
170
214
  ]
171
-
172
- match += 1
173
215
  end
174
216
  rescue OpenURI::HTTPError
175
217
  #end when there are no more matches -- indicated by 404 when accessing matches-n-top.html
@@ -180,6 +222,31 @@ class MossRuby
180
222
 
181
223
  private
182
224
 
225
+ # <tr><td><a href="http://moss.stanford.edu/results/916994582/match0.html">./done/COS10009-5/4936175/27258/ (51%)</a>
226
+ # </td><td><a href="http://moss.stanford.edu/results/916994582/match0.html">./done/COS10009-5/4928431/34630/ (51%)</a>
227
+ # </td><td align="right">80
228
+ # </td></tr>
229
+
230
+ def get_matches(uri, min_pct, callback)
231
+ result = Array.new
232
+ begin
233
+ callback.call(" - Reading match data") unless callback.nil?
234
+ page = open("#{uri}").read
235
+ regex = /<TR><TD><A HREF=".*?match(?<match_id>\d+).html">.*\((?<pct0>\d+)%\)<\/A>\n.*?<TD><A.*?((?<pct0>\d+)%\))/i
236
+ # puts "scanning page"
237
+ page.scan(regex).each do | match |
238
+ id, pct0, pct1 = match
239
+ # puts "#{id}, #{pct0}, #{pct1}"
240
+ if Integer(pct0) >= min_pct || Integer(pct1) >= min_pct
241
+ result << id
242
+ end
243
+ end
244
+ callback.call(" - Found #{result.count} match with at least #{min_pct}% similar") unless callback.nil?
245
+ rescue
246
+ end
247
+ result
248
+ end
249
+
183
250
  def strip_a(html)
184
251
  html.gsub(/<A.*?>.*?<\/A>/, '')
185
252
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: moss_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Cain
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-01 00:00:00.000000000 Z
11
+ date: 2015-06-26 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Moss-ruby is an unofficial ruby gem for the Moss system for Detecting
14
14
  Software Plagiarism (http://theory.stanford.edu/~aiken/moss/)