cmp 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/cmp.rb +101 -0
- metadata +43 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4b5c0414c942c857a400ec389fc1a4a41bd7f4b8
|
4
|
+
data.tar.gz: 8f85a4403227992c75fec44ab418a3e94b1b3729
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: aec7a9c36a8b9a3b9df06413ffe3e78c5107ab4ec1c87efd21984b111385d1f8f0e0ec9279b49857740342bd6e99b12d2e911057d4649273742acd6167868a41
|
7
|
+
data.tar.gz: bb78d0697b03f23e9a00674e04415c2a4645e00a0e814393f94a1283790218b229cd7ba9d653c117b8e0d2ebd2e0e54c17f7c950905a0c532f71fb44d7d1d32c
|
data/lib/cmp.rb
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
module DoubleText
|
2
|
+
class Analysis
|
3
|
+
class << self
|
4
|
+
attr_accessor :log_text
|
5
|
+
|
6
|
+
def open(file)
|
7
|
+
File.file?(file) ? File.open(file).read.chomp.strip.gsub(/'/, '') : file.to_s.dup
|
8
|
+
end
|
9
|
+
|
10
|
+
def txt_cmp(f0, f1)
|
11
|
+
str_f0, str_f1 = open(f0), open(f1)
|
12
|
+
|
13
|
+
@log_text = {
|
14
|
+
or_text_1: str_f0.dup,
|
15
|
+
or_text_2: str_f1.dup,
|
16
|
+
succ_char:[],
|
17
|
+
failed_char: []
|
18
|
+
}
|
19
|
+
|
20
|
+
arr_f0_each_char = str_f0.dup.split('')
|
21
|
+
arr_f1_each_char = str_f1.dup.split('')
|
22
|
+
str_f0_size = str_f0.dup.size
|
23
|
+
str_f1_size = str_f1.dup.size
|
24
|
+
|
25
|
+
arr_f0_each_char.each_with_index do |char, index|
|
26
|
+
cmp = false
|
27
|
+
break if [arr_f0_each_char.size, arr_f1_each_char.size].any?{|size| size == 0}
|
28
|
+
to_index = index
|
29
|
+
|
30
|
+
while str_f1.include?(char) do
|
31
|
+
if [str_f0_size, str_f1_size].min < (to_index + 1)
|
32
|
+
cmp = true
|
33
|
+
break
|
34
|
+
end
|
35
|
+
to_index += 1
|
36
|
+
char = arr_f0_each_char[index..to_index].join
|
37
|
+
end
|
38
|
+
|
39
|
+
to_index -= 1 unless [str_f0_size, str_f1_size].min == (to_index + 1) && cmp
|
40
|
+
if (to_index - index) > 0 || (to_index - index == 0 && ([str_f0_size, str_f1_size].min == 1))
|
41
|
+
succ_char = arr_f0_each_char.slice!(index..to_index).join
|
42
|
+
arr_f0_each_char.unshift(nil)
|
43
|
+
arr_f1_each_char = arr_f1_each_char.join.sub(succ_char,'').split
|
44
|
+
@log_text[:succ_char] << succ_char
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
@log_text[:failed_char] << arr_f1_each_char.join << arr_f0_each_char.join
|
49
|
+
@log_text[:failed_char].delete('')
|
50
|
+
|
51
|
+
"#{(
|
52
|
+
(@log_text[:succ_char].join.size.to_f / [ @log_text[:or_text_1].size.to_f, @log_text[:or_text_2].size.to_f].max ) * 100
|
53
|
+
).round(3)
|
54
|
+
}%".strip
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# (puts "你必须输入两个参数(文件或者字符串)";exit) if ARGV.size != 2
|
61
|
+
# result = DoubleText::Analysis.txt_cmp(f0=ARGV[0],f1=ARGV[1])
|
62
|
+
# p({
|
63
|
+
# result: "#{f0} 与 #{f1} 匹配度 是 #{result}",
|
64
|
+
# data: DoubleText::Analysis.log_text
|
65
|
+
# })
|
66
|
+
|
67
|
+
# =============================>
|
68
|
+
# simple test
|
69
|
+
# result = DoubleText::Analysis.txt_cmp('aa','aa')
|
70
|
+
# p({
|
71
|
+
# result: " 匹配度 是 #{result}",
|
72
|
+
# data: DoubleText::Analysis.log_text
|
73
|
+
# })
|
74
|
+
#
|
75
|
+
# result = DoubleText::Analysis.txt_cmp('aaa','aa')
|
76
|
+
# p({
|
77
|
+
# result: " 匹配度 是 #{result}",
|
78
|
+
# data: DoubleText::Analysis.log_text
|
79
|
+
# })
|
80
|
+
#
|
81
|
+
# result = DoubleText::Analysis.txt_cmp('aa','aaa')
|
82
|
+
# p({
|
83
|
+
# result: " 匹配度 是 #{result}",
|
84
|
+
# data: DoubleText::Analysis.log_text
|
85
|
+
# })
|
86
|
+
#
|
87
|
+
# result = DoubleText::Analysis.txt_cmp('a','a')
|
88
|
+
# p({
|
89
|
+
# result: " 匹配度 是 #{result}",
|
90
|
+
# data: DoubleText::Analysis.log_text
|
91
|
+
# })
|
92
|
+
#
|
93
|
+
# result = DoubleText::Analysis.txt_cmp('aaaaaaa','aaaaaaa')
|
94
|
+
# p({
|
95
|
+
# result: " 匹配度 是 #{result}",
|
96
|
+
# data: DoubleText::Analysis.log_text
|
97
|
+
# })
|
98
|
+
|
99
|
+
|
100
|
+
|
101
|
+
|
metadata
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cmp
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- mico_xiaozhen
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-04-28 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: 比较两个文件中文字相似度,也可以直接比较2段文字
|
14
|
+
email: mico_xiaozhen@sina.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/cmp.rb
|
20
|
+
homepage: http://rubygems.org/mico_xiaozhen/cmp
|
21
|
+
licenses: []
|
22
|
+
metadata: {}
|
23
|
+
post_install_message:
|
24
|
+
rdoc_options: []
|
25
|
+
require_paths:
|
26
|
+
- lib
|
27
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
28
|
+
requirements:
|
29
|
+
- - ">="
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: '0'
|
32
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
33
|
+
requirements:
|
34
|
+
- - ">="
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '0'
|
37
|
+
requirements: []
|
38
|
+
rubyforge_project:
|
39
|
+
rubygems_version: 2.6.12
|
40
|
+
signing_key:
|
41
|
+
specification_version: 4
|
42
|
+
summary: 比较两个文件中文字相似度,也可以直接比较2段文字
|
43
|
+
test_files: []
|