gtfs_stops_clustering 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bd1d3d49ce47faac98cf22d674adab3d37d0afc1a0b7b55ecba36fe5cde3bac3
4
- data.tar.gz: 3b6572e53a4268c2e8def8dd80e8605768e425dd05570827822b80ce0448a100
3
+ metadata.gz: bf75afd13663d9ebf254d37a24763336b5f411c174f3e8b9743af32c0f3843e9
4
+ data.tar.gz: 16d82b24fbd8b2170917f87745b68874802f95a151ebe500bf51cba408edd7b1
5
5
  SHA512:
6
- metadata.gz: a7f35b9d4c35f638b5baac0fa384f50e44db4ef1a20cfc119c74c184b1d900c1207704c1503094fb85c63ea84c9acf283c4cb33e8e7efa7ae13aa5d18e5d5d67
7
- data.tar.gz: 6b5c325393774c7f926891445c09f82eda11fbdb1b5528942e31284f2c5c946ceada56aabe82911a8167956a933ffce1ec17fb599a71ace3853e24dcbec059a9
6
+ metadata.gz: 5efc4d7c36b869092e9147f5b350af0178cf968d08e9df1fe94705e7277f19b6d3bccdd91e94ba4c23735a827125263a2ac7dec5dee4768da6314fe49326ea6b
7
+ data.tar.gz: 01e7f5fa34685099706249c1f40e64f40986fce2fc64a78f8696d6be967a1bbafdc2c5706830d7b5b423d984df3b232e3fe96acc02507a10d5fbb2f23eaa6792
data/.rubocop.yml CHANGED
@@ -11,3 +11,12 @@ Style/StringLiteralsInInterpolation:
11
11
 
12
12
  Layout/LineLength:
13
13
  Max: 140
14
+
15
+ Style/FrozenStringLiteralComment:
16
+ Enabled: false
17
+
18
+ Metrics/MethodLength:
19
+ Max: 25
20
+
21
+ Metrics/AbcSize:
22
+ Max: 20
@@ -32,10 +32,6 @@ Gem::Specification.new do |spec|
32
32
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
33
33
  spec.require_paths = ["lib"]
34
34
 
35
- # spec.files = ["lib/gtfs_stops_clustering.rb", "lib/gtfs_stops_clustering/data_import.rb", "lib/gtfs_stops_clustering/dbscan.rb",
36
- # "lib/gtfs_stops_clustering/redis_geodata.rb", "lib/gtfs_stops_clustering/version.rb",
37
- # "lib/gtfs_stops_clustering/input_consistency_checks.rb"]
38
-
39
35
  spec.add_runtime_dependency "csv", "~> 3.2", ">= 3.2.8"
40
36
  spec.add_runtime_dependency "distance_measures", "~> 0.0.6"
41
37
  spec.add_runtime_dependency "geocoder", "~> 1.8", ">= 1.8.2"
@@ -4,6 +4,7 @@ require "distance_measures"
4
4
  require "text"
5
5
  require "geocoder"
6
6
  require_relative "redis_geodata"
7
+ require_relative "utils"
7
8
 
8
9
  # Array class
9
10
  class Array
@@ -50,31 +51,26 @@ module DBSCAN
50
51
 
51
52
  if neighbors.size >= options[:min_points]
52
53
  current_cluster += 1
53
- point.cluster = current_cluster
54
- cluster = [point].push(add_connected(neighbors, current_cluster))
55
- clusters[current_cluster] = cluster.flatten
56
-
57
- # Get Cluster Name
58
- labels = clusters[current_cluster].map { |e| e.label.capitalize }
59
- cluster_name = find_cluster_name(labels)
60
-
61
- # Get Cluster Position
62
- cluster_pos = find_cluster_position(clusters[current_cluster])
63
-
64
- clusters[current_cluster].each do |e|
65
- e.cluster_name = cluster_name
66
- e.cluster_pos = cluster_pos
67
- end
54
+ create_cluster(current_cluster, point, neighbors)
55
+ update_cluster_info(current_cluster)
68
56
  else
69
57
  clusters[-1].push(point)
70
58
  end
71
59
  end
72
60
  end
73
61
 
74
- def results
75
- hash = {}
76
- @clusters.dup.each { |cluster_index, value| hash[cluster_index] = value.flatten.map(&:items) unless value.flatten.empty? }
77
- hash
62
+ def create_cluster(cluster_index, point, neighbors)
63
+ point.cluster = cluster_index
64
+ cluster = [point].push(add_connected(neighbors, cluster_index))
65
+ @clusters[cluster_index] = cluster.flatten
66
+ end
67
+
68
+ def update_cluster_info(cluster_index)
69
+ labels = @clusters[cluster_index].map { |e| e.label.capitalize }
70
+ @clusters[cluster_index].each do |e|
71
+ e.cluster_name = Utils.find_cluster_name(labels)
72
+ e.cluster_pos = Utils.find_cluster_position(clusters[cluster_index])
73
+ end
78
74
  end
79
75
 
80
76
  def labeled_results
@@ -103,16 +99,10 @@ module DBSCAN
103
99
  neighbors = []
104
100
  geosearch_results = geosearch(point.items[1], point.items[0])
105
101
  geosearch_results.each do |neighbor_pos|
106
- coordinates = neighbor_pos.split(",")
107
- neighbor = @points.find do |elem|
108
- elem.items[0] == coordinates[1] &&
109
- elem.items[1] == coordinates[0]
110
- end
102
+ neighbor = Utils.find_inmediate_neighbor(neighbor_pos, @points)
111
103
  next unless neighbor
112
104
 
113
- string_distance = Text::Levenshtein.distance(point.label.downcase, neighbor.label.downcase)
114
- similarity = 1 - string_distance.to_f / [point.label.length, point.label.length].max
115
- neighbors.push(neighbor) if similarity > options[:similarity]
105
+ neighbors.push(neighbor) if Utils.string_similarity(point.label.downcase, neighbor.label.downcase) > options[:similarity]
116
106
  end
117
107
  neighbors
118
108
  end
@@ -139,30 +129,8 @@ module DBSCAN
139
129
 
140
130
  cluster_points
141
131
  end
142
-
143
- def find_cluster_name(labels)
144
- words = labels.map { |label| label.strip.split }
145
- common_title = ""
146
-
147
- # Loop through each word index starting from the first
148
- (0...words.first.length).each do |i|
149
- words_at_index = words.map { |word_list| word_list[i] }
150
-
151
- break unless words_at_index.uniq.length == 1
152
-
153
- common_title += " #{words_at_index.first.capitalize}"
154
- end
155
-
156
- common_title.strip! ? common_title : labels.first
157
- end
158
- def find_cluster_position(cluster)
159
- total_lat = cluster.map { |e| e.items[0].to_f }.sum
160
- total_lon = cluster.map { |e| e.items[1].to_f }.sum
161
- avg_lat = total_lat / cluster.size
162
- avg_lon = total_lon / cluster.size
163
- [avg_lat, avg_lon]
164
- end
165
132
  end
133
+
166
134
  # Point class
167
135
  class Point
168
136
  attr_accessor :items, :cluster, :visited, :label, :cluster_name, :cluster_pos
@@ -182,7 +150,7 @@ module DBSCAN
182
150
  end
183
151
  end
184
152
 
185
- def DBSCAN(* args)
153
+ def dbscan(* args)
186
154
  clusterer = Clusterer.new(*args)
187
155
  clusterer.labeled_results
188
156
  end
@@ -0,0 +1,41 @@
1
+ # lib/utils.rb
2
+
3
+ # Utils class
4
+ class Utils
5
+ def self.find_cluster_name(labels)
6
+ words = labels.map { |label| label.strip.split }
7
+ common_title = ""
8
+
9
+ # Loop through each word index starting from the first
10
+ (0...words.first.length).each do |i|
11
+ words_at_index = words.map { |word_list| word_list[i] }
12
+
13
+ break unless words_at_index.uniq.length == 1
14
+
15
+ common_title += " #{words_at_index.first.capitalize}"
16
+ end
17
+
18
+ common_title.strip! ? common_title : labels.first
19
+ end
20
+
21
+ def self.find_cluster_position(cluster)
22
+ total_lat = cluster.map { |e| e.items[0].to_f }.sum
23
+ total_lon = cluster.map { |e| e.items[1].to_f }.sum
24
+ avg_lat = total_lat / cluster.size
25
+ avg_lon = total_lon / cluster.size
26
+ [avg_lat, avg_lon]
27
+ end
28
+
29
+ def self.string_similarity(str1, str2)
30
+ string_distance = Text::Levenshtein.distance(str1.downcase, str2.downcase)
31
+ 1 - string_distance.to_f / [str1.length, str2.length].max
32
+ end
33
+
34
+ def self.find_inmediate_neighbor(neighbor_pos, points)
35
+ coordinates_split = neighbor_pos.split(",")
36
+ points.find do |elem|
37
+ elem.items[0] == coordinates_split[1] &&
38
+ elem.items[1] == coordinates_split[0]
39
+ end
40
+ end
41
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module GtfsStopsClustering
4
- VERSION = "0.1.4"
4
+ VERSION = "0.1.5"
5
5
  end
@@ -37,19 +37,17 @@ module GtfsStopsClustering
37
37
  def create_stops_merged
38
38
  gtfs_stops = []
39
39
  @gtfs_paths.each do |gtfs_path|
40
- begin
41
- gtfs = GTFS::Source.build(gtfs_path)
42
- gtfs_stops << gtfs.stops
43
- rescue GTFS::InvalidSourceException => e
44
- raise IOError "Error occurred while building GTFS from #{gtfs_path}: #{e.message}"
45
- end
40
+ gtfs = GTFS::Source.build(gtfs_path)
41
+ gtfs_stops << gtfs.stops
42
+ rescue GTFS::InvalidSourceException => e
43
+ raise IOError "Error occurred while building GTFS from #{gtfs_path}: #{e.message}"
46
44
  end
47
45
  gtfs_stops.flatten
48
46
  end
49
47
 
50
48
  def clusterize_stops
51
49
  data = import_stops_data(@gtfs_stops, @stops_config_path)
52
- @clusters = DBSCAN(data[:stops_data],
50
+ @clusters = dbscan(data[:stops_data],
53
51
  data[:stops_redis_geodata],
54
52
  epsilon: @epsilon,
55
53
  min_points: @min_points,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gtfs_stops_clustering
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pietro Visconti
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-12-08 00:00:00.000000000 Z
11
+ date: 2023-12-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: csv
@@ -166,6 +166,7 @@ files:
166
166
  - lib/gtfs_stops_clustering/dbscan.rb
167
167
  - lib/gtfs_stops_clustering/input_consistency_checks.rb
168
168
  - lib/gtfs_stops_clustering/redis_geodata.rb
169
+ - lib/gtfs_stops_clustering/utils.rb
169
170
  - lib/gtfs_stops_clustering/version.rb
170
171
  - lib/stops_corner_cases.txt
171
172
  - sig/gtfs_stops_clustering.rbs