cmp 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/cmp.rb +101 -0
  3. metadata +43 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4b5c0414c942c857a400ec389fc1a4a41bd7f4b8
4
+ data.tar.gz: 8f85a4403227992c75fec44ab418a3e94b1b3729
5
+ SHA512:
6
+ metadata.gz: aec7a9c36a8b9a3b9df06413ffe3e78c5107ab4ec1c87efd21984b111385d1f8f0e0ec9279b49857740342bd6e99b12d2e911057d4649273742acd6167868a41
7
+ data.tar.gz: bb78d0697b03f23e9a00674e04415c2a4645e00a0e814393f94a1283790218b229cd7ba9d653c117b8e0d2ebd2e0e54c17f7c950905a0c532f71fb44d7d1d32c
data/lib/cmp.rb ADDED
@@ -0,0 +1,101 @@
1
+ module DoubleText
2
+ class Analysis
3
+ class << self
4
+ attr_accessor :log_text
5
+
6
+ def open(file)
7
+ File.file?(file) ? File.open(file).read.chomp.strip.gsub(/'/, '') : file.to_s.dup
8
+ end
9
+
10
+ def txt_cmp(f0, f1)
11
+ str_f0, str_f1 = open(f0), open(f1)
12
+
13
+ @log_text = {
14
+ or_text_1: str_f0.dup,
15
+ or_text_2: str_f1.dup,
16
+ succ_char:[],
17
+ failed_char: []
18
+ }
19
+
20
+ arr_f0_each_char = str_f0.dup.split('')
21
+ arr_f1_each_char = str_f1.dup.split('')
22
+ str_f0_size = str_f0.dup.size
23
+ str_f1_size = str_f1.dup.size
24
+
25
+ arr_f0_each_char.each_with_index do |char, index|
26
+ cmp = false
27
+ break if [arr_f0_each_char.size, arr_f1_each_char.size].any?{|size| size == 0}
28
+ to_index = index
29
+
30
+ while str_f1.include?(char) do
31
+ if [str_f0_size, str_f1_size].min < (to_index + 1)
32
+ cmp = true
33
+ break
34
+ end
35
+ to_index += 1
36
+ char = arr_f0_each_char[index..to_index].join
37
+ end
38
+
39
+ to_index -= 1 unless [str_f0_size, str_f1_size].min == (to_index + 1) && cmp
40
+ if (to_index - index) > 0 || (to_index - index == 0 && ([str_f0_size, str_f1_size].min == 1))
41
+ succ_char = arr_f0_each_char.slice!(index..to_index).join
42
+ arr_f0_each_char.unshift(nil)
43
+ arr_f1_each_char = arr_f1_each_char.join.sub(succ_char,'').split
44
+ @log_text[:succ_char] << succ_char
45
+ end
46
+ end
47
+
48
+ @log_text[:failed_char] << arr_f1_each_char.join << arr_f0_each_char.join
49
+ @log_text[:failed_char].delete('')
50
+
51
+ "#{(
52
+ (@log_text[:succ_char].join.size.to_f / [ @log_text[:or_text_1].size.to_f, @log_text[:or_text_2].size.to_f].max ) * 100
53
+ ).round(3)
54
+ }%".strip
55
+ end
56
+ end
57
+ end
58
+ end
59
+
60
+ # (puts "你必须输入两个参数(文件或者字符串)";exit) if ARGV.size != 2
61
+ # result = DoubleText::Analysis.txt_cmp(f0=ARGV[0],f1=ARGV[1])
62
+ # p({
63
+ # result: "#{f0} 与 #{f1} 匹配度 是 #{result}",
64
+ # data: DoubleText::Analysis.log_text
65
+ # })
66
+
67
+ # =============================>
68
+ # simple test
69
+ # result = DoubleText::Analysis.txt_cmp('aa','aa')
70
+ # p({
71
+ # result: " 匹配度 是 #{result}",
72
+ # data: DoubleText::Analysis.log_text
73
+ # })
74
+ #
75
+ # result = DoubleText::Analysis.txt_cmp('aaa','aa')
76
+ # p({
77
+ # result: " 匹配度 是 #{result}",
78
+ # data: DoubleText::Analysis.log_text
79
+ # })
80
+ #
81
+ # result = DoubleText::Analysis.txt_cmp('aa','aaa')
82
+ # p({
83
+ # result: " 匹配度 是 #{result}",
84
+ # data: DoubleText::Analysis.log_text
85
+ # })
86
+ #
87
+ # result = DoubleText::Analysis.txt_cmp('a','a')
88
+ # p({
89
+ # result: " 匹配度 是 #{result}",
90
+ # data: DoubleText::Analysis.log_text
91
+ # })
92
+ #
93
+ # result = DoubleText::Analysis.txt_cmp('aaaaaaa','aaaaaaa')
94
+ # p({
95
+ # result: " 匹配度 是 #{result}",
96
+ # data: DoubleText::Analysis.log_text
97
+ # })
98
+
99
+
100
+
101
+
metadata ADDED
@@ -0,0 +1,43 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cmp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - mico_xiaozhen
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-04-28 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: 比较两个文件中文字相似度,也可以直接比较2段文字
14
+ email: mico_xiaozhen@sina.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/cmp.rb
20
+ homepage: http://rubygems.org/mico_xiaozhen/cmp
21
+ licenses: []
22
+ metadata: {}
23
+ post_install_message:
24
+ rdoc_options: []
25
+ require_paths:
26
+ - lib
27
+ required_ruby_version: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ version: '0'
32
+ required_rubygems_version: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ requirements: []
38
+ rubyforge_project:
39
+ rubygems_version: 2.6.12
40
+ signing_key:
41
+ specification_version: 4
42
+ summary: 比较两个文件中文字相似度,也可以直接比较2段文字
43
+ test_files: []