gtfs_stops_clustering 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -0
- data/gtfs_stops_clustering.gemspec +0 -4
- data/lib/gtfs_stops_clustering/dbscan.rb +19 -51
- data/lib/gtfs_stops_clustering/utils.rb +41 -0
- data/lib/gtfs_stops_clustering/version.rb +1 -1
- data/lib/gtfs_stops_clustering.rb +5 -7
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bf75afd13663d9ebf254d37a24763336b5f411c174f3e8b9743af32c0f3843e9
|
4
|
+
data.tar.gz: 16d82b24fbd8b2170917f87745b68874802f95a151ebe500bf51cba408edd7b1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5efc4d7c36b869092e9147f5b350af0178cf968d08e9df1fe94705e7277f19b6d3bccdd91e94ba4c23735a827125263a2ac7dec5dee4768da6314fe49326ea6b
|
7
|
+
data.tar.gz: 01e7f5fa34685099706249c1f40e64f40986fce2fc64a78f8696d6be967a1bbafdc2c5706830d7b5b423d984df3b232e3fe96acc02507a10d5fbb2f23eaa6792
|
data/.rubocop.yml
CHANGED
@@ -32,10 +32,6 @@ Gem::Specification.new do |spec|
|
|
32
32
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
33
33
|
spec.require_paths = ["lib"]
|
34
34
|
|
35
|
-
# spec.files = ["lib/gtfs_stops_clustering.rb", "lib/gtfs_stops_clustering/data_import.rb", "lib/gtfs_stops_clustering/dbscan.rb",
|
36
|
-
# "lib/gtfs_stops_clustering/redis_geodata.rb", "lib/gtfs_stops_clustering/version.rb",
|
37
|
-
# "lib/gtfs_stops_clustering/input_consistency_checks.rb"]
|
38
|
-
|
39
35
|
spec.add_runtime_dependency "csv", "~> 3.2", ">= 3.2.8"
|
40
36
|
spec.add_runtime_dependency "distance_measures", "~> 0.0.6"
|
41
37
|
spec.add_runtime_dependency "geocoder", "~> 1.8", ">= 1.8.2"
|
@@ -4,6 +4,7 @@ require "distance_measures"
|
|
4
4
|
require "text"
|
5
5
|
require "geocoder"
|
6
6
|
require_relative "redis_geodata"
|
7
|
+
require_relative "utils"
|
7
8
|
|
8
9
|
# Array class
|
9
10
|
class Array
|
@@ -50,31 +51,26 @@ module DBSCAN
|
|
50
51
|
|
51
52
|
if neighbors.size >= options[:min_points]
|
52
53
|
current_cluster += 1
|
53
|
-
point
|
54
|
-
|
55
|
-
clusters[current_cluster] = cluster.flatten
|
56
|
-
|
57
|
-
# Get Cluster Name
|
58
|
-
labels = clusters[current_cluster].map { |e| e.label.capitalize }
|
59
|
-
cluster_name = find_cluster_name(labels)
|
60
|
-
|
61
|
-
# Get Cluster Position
|
62
|
-
cluster_pos = find_cluster_position(clusters[current_cluster])
|
63
|
-
|
64
|
-
clusters[current_cluster].each do |e|
|
65
|
-
e.cluster_name = cluster_name
|
66
|
-
e.cluster_pos = cluster_pos
|
67
|
-
end
|
54
|
+
create_cluster(current_cluster, point, neighbors)
|
55
|
+
update_cluster_info(current_cluster)
|
68
56
|
else
|
69
57
|
clusters[-1].push(point)
|
70
58
|
end
|
71
59
|
end
|
72
60
|
end
|
73
61
|
|
74
|
-
def
|
75
|
-
|
76
|
-
|
77
|
-
|
62
|
+
def create_cluster(cluster_index, point, neighbors)
|
63
|
+
point.cluster = cluster_index
|
64
|
+
cluster = [point].push(add_connected(neighbors, cluster_index))
|
65
|
+
@clusters[cluster_index] = cluster.flatten
|
66
|
+
end
|
67
|
+
|
68
|
+
def update_cluster_info(cluster_index)
|
69
|
+
labels = @clusters[cluster_index].map { |e| e.label.capitalize }
|
70
|
+
@clusters[cluster_index].each do |e|
|
71
|
+
e.cluster_name = Utils.find_cluster_name(labels)
|
72
|
+
e.cluster_pos = Utils.find_cluster_position(clusters[cluster_index])
|
73
|
+
end
|
78
74
|
end
|
79
75
|
|
80
76
|
def labeled_results
|
@@ -103,16 +99,10 @@ module DBSCAN
|
|
103
99
|
neighbors = []
|
104
100
|
geosearch_results = geosearch(point.items[1], point.items[0])
|
105
101
|
geosearch_results.each do |neighbor_pos|
|
106
|
-
|
107
|
-
neighbor = @points.find do |elem|
|
108
|
-
elem.items[0] == coordinates[1] &&
|
109
|
-
elem.items[1] == coordinates[0]
|
110
|
-
end
|
102
|
+
neighbor = Utils.find_inmediate_neighbor(neighbor_pos, @points)
|
111
103
|
next unless neighbor
|
112
104
|
|
113
|
-
|
114
|
-
similarity = 1 - string_distance.to_f / [point.label.length, point.label.length].max
|
115
|
-
neighbors.push(neighbor) if similarity > options[:similarity]
|
105
|
+
neighbors.push(neighbor) if Utils.string_similarity(point.label.downcase, neighbor.label.downcase) > options[:similarity]
|
116
106
|
end
|
117
107
|
neighbors
|
118
108
|
end
|
@@ -139,30 +129,8 @@ module DBSCAN
|
|
139
129
|
|
140
130
|
cluster_points
|
141
131
|
end
|
142
|
-
|
143
|
-
def find_cluster_name(labels)
|
144
|
-
words = labels.map { |label| label.strip.split }
|
145
|
-
common_title = ""
|
146
|
-
|
147
|
-
# Loop through each word index starting from the first
|
148
|
-
(0...words.first.length).each do |i|
|
149
|
-
words_at_index = words.map { |word_list| word_list[i] }
|
150
|
-
|
151
|
-
break unless words_at_index.uniq.length == 1
|
152
|
-
|
153
|
-
common_title += " #{words_at_index.first.capitalize}"
|
154
|
-
end
|
155
|
-
|
156
|
-
common_title.strip! ? common_title : labels.first
|
157
|
-
end
|
158
|
-
def find_cluster_position(cluster)
|
159
|
-
total_lat = cluster.map { |e| e.items[0].to_f }.sum
|
160
|
-
total_lon = cluster.map { |e| e.items[1].to_f }.sum
|
161
|
-
avg_lat = total_lat / cluster.size
|
162
|
-
avg_lon = total_lon / cluster.size
|
163
|
-
[avg_lat, avg_lon]
|
164
|
-
end
|
165
132
|
end
|
133
|
+
|
166
134
|
# Point class
|
167
135
|
class Point
|
168
136
|
attr_accessor :items, :cluster, :visited, :label, :cluster_name, :cluster_pos
|
@@ -182,7 +150,7 @@ module DBSCAN
|
|
182
150
|
end
|
183
151
|
end
|
184
152
|
|
185
|
-
def
|
153
|
+
def dbscan(* args)
|
186
154
|
clusterer = Clusterer.new(*args)
|
187
155
|
clusterer.labeled_results
|
188
156
|
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# lib/utils.rb
|
2
|
+
|
3
|
+
# Utils class
|
4
|
+
class Utils
|
5
|
+
def self.find_cluster_name(labels)
|
6
|
+
words = labels.map { |label| label.strip.split }
|
7
|
+
common_title = ""
|
8
|
+
|
9
|
+
# Loop through each word index starting from the first
|
10
|
+
(0...words.first.length).each do |i|
|
11
|
+
words_at_index = words.map { |word_list| word_list[i] }
|
12
|
+
|
13
|
+
break unless words_at_index.uniq.length == 1
|
14
|
+
|
15
|
+
common_title += " #{words_at_index.first.capitalize}"
|
16
|
+
end
|
17
|
+
|
18
|
+
common_title.strip! ? common_title : labels.first
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.find_cluster_position(cluster)
|
22
|
+
total_lat = cluster.map { |e| e.items[0].to_f }.sum
|
23
|
+
total_lon = cluster.map { |e| e.items[1].to_f }.sum
|
24
|
+
avg_lat = total_lat / cluster.size
|
25
|
+
avg_lon = total_lon / cluster.size
|
26
|
+
[avg_lat, avg_lon]
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.string_similarity(str1, str2)
|
30
|
+
string_distance = Text::Levenshtein.distance(str1.downcase, str2.downcase)
|
31
|
+
1 - string_distance.to_f / [str1.length, str2.length].max
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.find_inmediate_neighbor(neighbor_pos, points)
|
35
|
+
coordinates_split = neighbor_pos.split(",")
|
36
|
+
points.find do |elem|
|
37
|
+
elem.items[0] == coordinates_split[1] &&
|
38
|
+
elem.items[1] == coordinates_split[0]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -37,19 +37,17 @@ module GtfsStopsClustering
|
|
37
37
|
def create_stops_merged
|
38
38
|
gtfs_stops = []
|
39
39
|
@gtfs_paths.each do |gtfs_path|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
raise IOError "Error occurred while building GTFS from #{gtfs_path}: #{e.message}"
|
45
|
-
end
|
40
|
+
gtfs = GTFS::Source.build(gtfs_path)
|
41
|
+
gtfs_stops << gtfs.stops
|
42
|
+
rescue GTFS::InvalidSourceException => e
|
43
|
+
raise IOError "Error occurred while building GTFS from #{gtfs_path}: #{e.message}"
|
46
44
|
end
|
47
45
|
gtfs_stops.flatten
|
48
46
|
end
|
49
47
|
|
50
48
|
def clusterize_stops
|
51
49
|
data = import_stops_data(@gtfs_stops, @stops_config_path)
|
52
|
-
@clusters =
|
50
|
+
@clusters = dbscan(data[:stops_data],
|
53
51
|
data[:stops_redis_geodata],
|
54
52
|
epsilon: @epsilon,
|
55
53
|
min_points: @min_points,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gtfs_stops_clustering
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pietro Visconti
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-12-
|
11
|
+
date: 2023-12-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: csv
|
@@ -166,6 +166,7 @@ files:
|
|
166
166
|
- lib/gtfs_stops_clustering/dbscan.rb
|
167
167
|
- lib/gtfs_stops_clustering/input_consistency_checks.rb
|
168
168
|
- lib/gtfs_stops_clustering/redis_geodata.rb
|
169
|
+
- lib/gtfs_stops_clustering/utils.rb
|
169
170
|
- lib/gtfs_stops_clustering/version.rb
|
170
171
|
- lib/stops_corner_cases.txt
|
171
172
|
- sig/gtfs_stops_clustering.rbs
|