moss_ruby 1.0.0 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/moss_ruby.rb +86 -19
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: de73a58e506ce9d6bd301468837e0015172bd9e8
|
4
|
+
data.tar.gz: 6a42a22ed34ef12ee88520f98679b3b05c67ba72
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 542d17634cb116fee0856d008be17594256d0f5440f776284a94ca93ffaa655fdf2279bd65019142aeff1610373b8f6091644f789f6e6f0a74adc83535ea4a11
|
7
|
+
data.tar.gz: 0f72cea79daa7112c91f094c12d28ba27a2b2bf73f2ca7fe0a601db04717d21e47b793ebd8a7608253048fda4ec69103a1adbe0bd562a3daa2183fd3710c72cc
|
data/lib/moss_ruby.rb
CHANGED
@@ -68,14 +68,25 @@ class MossRuby
|
|
68
68
|
content = IO.read(filename)
|
69
69
|
size = content.length
|
70
70
|
|
71
|
-
|
72
|
-
|
71
|
+
if size > 0
|
72
|
+
moss_server.write "file #{id} #{@options[:language]} #{size} #{file}\n"
|
73
|
+
moss_server.write content
|
74
|
+
end
|
75
|
+
puts "File upload size #{filename} #{size}"
|
76
|
+
begin
|
77
|
+
line = moss_server.read_nonblock(100)
|
78
|
+
puts "--------------------------------------------"
|
79
|
+
puts " From MOSS: #{line}"
|
80
|
+
puts "--------------------------------------------"
|
81
|
+
rescue IO::WaitReadable
|
82
|
+
#expected
|
83
|
+
end
|
73
84
|
end
|
74
85
|
|
75
|
-
def check(files_dict)
|
86
|
+
def check(files_dict, callback=nil)
|
76
87
|
# Chech that the files_dict contains valid filenames
|
77
88
|
files_dict[:base_files].each do |file_search|
|
78
|
-
if Dir
|
89
|
+
if Dir.glob(file_search, File::FNM_CASEFOLD).length == 0
|
79
90
|
raise "Unable to locate base file(s) matching #{file_search}"
|
80
91
|
end
|
81
92
|
end
|
@@ -85,15 +96,17 @@ class MossRuby
|
|
85
96
|
end
|
86
97
|
|
87
98
|
files_dict[:files].each do |file_search|
|
88
|
-
if Dir
|
99
|
+
if Dir.glob(file_search, File::FNM_CASEFOLD).length == 0
|
89
100
|
raise "Unable to locate base file(s) matching #{file_search}"
|
90
101
|
end
|
91
102
|
end
|
92
103
|
|
93
104
|
# Connect to the server
|
105
|
+
callback.call('Connecting to MOSS') unless callback.nil?
|
94
106
|
moss_server = TCPSocket.new @server, @port
|
95
107
|
begin
|
96
108
|
# Send header details
|
109
|
+
callback.call(' - Sending configuration details') unless callback.nil?
|
97
110
|
moss_server.write "moss #{@userid}\n"
|
98
111
|
moss_server.write "directory #{@options[:directory_submission] ? 1 : 0 }\n"
|
99
112
|
moss_server.write "X #{@options[:experimental_server] ? 1 : 0}\n"
|
@@ -103,26 +116,43 @@ class MossRuby
|
|
103
116
|
# Send language option
|
104
117
|
moss_server.write "language #{@options[:language]}\n"
|
105
118
|
|
119
|
+
callback.call(' - Checking language') unless callback.nil?
|
106
120
|
line = moss_server.gets
|
107
121
|
if line.strip() != "yes"
|
108
122
|
moss_server.write "end\n"
|
109
123
|
raise "Invalid language option."
|
110
124
|
end
|
111
125
|
|
112
|
-
|
113
|
-
|
126
|
+
count = 1
|
127
|
+
processing = files_dict[:base_files]
|
128
|
+
processing.each do |file_search|
|
129
|
+
callback.call(" - Sending base files #{count} of #{processing.count} - #{file_search}") unless callback.nil?
|
130
|
+
files = Dir.glob(file_search, File::FNM_CASEFOLD)
|
131
|
+
file_count = 1
|
132
|
+
files.each do |file|
|
133
|
+
callback.call(" - Base file #{file_count} of #{files.count} - #{file}") unless callback.nil?
|
114
134
|
upload_file moss_server, file
|
135
|
+
file_count += 1
|
115
136
|
end
|
137
|
+
count += 1
|
116
138
|
end
|
117
139
|
|
118
140
|
idx = 1
|
119
|
-
|
120
|
-
|
141
|
+
count = 1
|
142
|
+
processing = files_dict[:files]
|
143
|
+
processing.each do |file_search|
|
144
|
+
callback.call(" - Sending files #{count} of #{processing.count} - #{file_search}") unless callback.nil?
|
145
|
+
files = Dir.glob(file_search, File::FNM_CASEFOLD)
|
146
|
+
file_count = 1
|
147
|
+
files.each do |file|
|
148
|
+
callback.call(" - File #{idx} = #{file_count} of #{files.count} - #{file}") unless callback.nil?
|
121
149
|
upload_file moss_server, file, idx
|
122
150
|
idx += 1
|
151
|
+
file_count += 1
|
123
152
|
end
|
124
153
|
end
|
125
154
|
|
155
|
+
callback.call(" - Waiting for server response") unless callback.nil?
|
126
156
|
moss_server.write "query 0 #{@options[:comment]}\n"
|
127
157
|
|
128
158
|
result = moss_server.gets
|
@@ -134,17 +164,29 @@ class MossRuby
|
|
134
164
|
end
|
135
165
|
end
|
136
166
|
|
137
|
-
def extract_results(uri)
|
167
|
+
def extract_results(uri, min_pct = 10, callback = nil)
|
138
168
|
result = Array.new
|
139
169
|
begin
|
140
|
-
match =
|
170
|
+
match = -1
|
141
171
|
match_file = Array.new
|
142
172
|
data = Array.new
|
143
|
-
|
173
|
+
to_fetch = get_matches(uri, min_pct, callback)
|
174
|
+
to_fetch.each do |id|
|
175
|
+
match += 1
|
176
|
+
callback.call("Checking match #{match + 1} (id #{id})") unless callback.nil?
|
177
|
+
|
144
178
|
# read the two match files
|
145
|
-
match_top = open("#{uri}/match#{
|
146
|
-
|
147
|
-
|
179
|
+
match_top = open("#{uri}/match#{id}-top.html").read
|
180
|
+
|
181
|
+
callback.call(" - checking match #{match} percents") unless callback.nil?
|
182
|
+
top = read_pcts match_top
|
183
|
+
|
184
|
+
next if Integer(top[:pct0]) < min_pct && Integer(top[:pct1]) < min_pct
|
185
|
+
|
186
|
+
callback.call(" - fetching #{match} html") unless callback.nil?
|
187
|
+
|
188
|
+
match_file[0] = open("#{uri}/match#{id}-0.html").read
|
189
|
+
match_file[1] = open("#{uri}/match#{id}-1.html").read
|
148
190
|
|
149
191
|
# puts match_top
|
150
192
|
# puts "---FILE0\n\n"
|
@@ -152,10 +194,12 @@ class MossRuby
|
|
152
194
|
# puts "---FILE1\n\n"
|
153
195
|
# puts match_file[1]
|
154
196
|
|
197
|
+
callback.call(" - extracting data for #{match}") unless callback.nil?
|
198
|
+
|
155
199
|
data[0] = read_data match_file[0]
|
156
200
|
data[1] = read_data match_file[1]
|
157
|
-
|
158
|
-
|
201
|
+
|
202
|
+
callback.call(" - adding #{match} result") unless callback.nil?
|
159
203
|
result << [
|
160
204
|
{
|
161
205
|
filename: data[0][:filename],
|
@@ -168,8 +212,6 @@ class MossRuby
|
|
168
212
|
pct: Integer(top[:pct1])
|
169
213
|
}
|
170
214
|
]
|
171
|
-
|
172
|
-
match += 1
|
173
215
|
end
|
174
216
|
rescue OpenURI::HTTPError
|
175
217
|
#end when there are no more matches -- indicated by 404 when accessing matches-n-top.html
|
@@ -180,6 +222,31 @@ class MossRuby
|
|
180
222
|
|
181
223
|
private
|
182
224
|
|
225
|
+
# <tr><td><a href="http://moss.stanford.edu/results/916994582/match0.html">./done/COS10009-5/4936175/27258/ (51%)</a>
|
226
|
+
# </td><td><a href="http://moss.stanford.edu/results/916994582/match0.html">./done/COS10009-5/4928431/34630/ (51%)</a>
|
227
|
+
# </td><td align="right">80
|
228
|
+
# </td></tr>
|
229
|
+
|
230
|
+
def get_matches(uri, min_pct, callback)
|
231
|
+
result = Array.new
|
232
|
+
begin
|
233
|
+
callback.call(" - Reading match data") unless callback.nil?
|
234
|
+
page = open("#{uri}").read
|
235
|
+
regex = /<TR><TD><A HREF=".*?match(?<match_id>\d+).html">.*\((?<pct0>\d+)%\)<\/A>\n.*?<TD><A.*?((?<pct0>\d+)%\))/i
|
236
|
+
# puts "scanning page"
|
237
|
+
page.scan(regex).each do | match |
|
238
|
+
id, pct0, pct1 = match
|
239
|
+
# puts "#{id}, #{pct0}, #{pct1}"
|
240
|
+
if Integer(pct0) >= min_pct || Integer(pct1) >= min_pct
|
241
|
+
result << id
|
242
|
+
end
|
243
|
+
end
|
244
|
+
callback.call(" - Found #{result.count} match with at least #{min_pct}% similar") unless callback.nil?
|
245
|
+
rescue
|
246
|
+
end
|
247
|
+
result
|
248
|
+
end
|
249
|
+
|
183
250
|
def strip_a(html)
|
184
251
|
html.gsub(/<A.*?>.*?<\/A>/, '')
|
185
252
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: moss_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Cain
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Moss-ruby is an unofficial ruby gem for the Moss system for Detecting
|
14
14
|
Software Plagiarism (http://theory.stanford.edu/~aiken/moss/)
|