gtfs_stops_clustering 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 6a9a16f2fa8980f4bd7e84e9912e7925caa556fd2b2daab56209ff8351e93a19
|
4
|
+
data.tar.gz: 652b0895096c0b55009d669ea1b8416368dec5884b4a395ca4fd46c762836575
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3ea8cd7f921ff06aa5e838684d1efb8fcaa48fa3b2601e85ed6f926fecebc9d4bb8831399d5bd0e63cf1a2531231e5d8c71b4e56a3287c9ef0aa602b22726a7f
|
7
|
+
data.tar.gz: 87bea9a4711300f07b9b6a0151590de265e6c55447a9c1e6ad3b7503b1ffaa45ac5090e1b1859a4786765b1709826519f84876504faac815d7c85282ee1857ed
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# lib/data_import.rb
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'gtfs'
|
5
|
+
|
6
|
+
module DataImport
|
7
|
+
VERSION='0.0.1'
|
8
|
+
attr_accessor :data_import
|
9
|
+
|
10
|
+
class DataImport
|
11
|
+
attr_accessor :stops, :stops_config_file, :stops_names, :stops_corner_cases, :stops_data, :stops_redis_geodata
|
12
|
+
|
13
|
+
def initialize(stops, stops_config_file)
|
14
|
+
@stops = stops
|
15
|
+
@stops_config_file = stops_config_file
|
16
|
+
@stops_corner_cases = []
|
17
|
+
@stops_names = []
|
18
|
+
@stops_data = []
|
19
|
+
@stops_redis_geodata = []
|
20
|
+
import_stops_corner_cases
|
21
|
+
import_stops_data
|
22
|
+
end
|
23
|
+
|
24
|
+
def import_stops_corner_cases
|
25
|
+
if File.exist?(@stops_config_file)
|
26
|
+
CSV.foreach(@stops_config_file, headers: true) do |row|
|
27
|
+
stop_name = row['stop_name']
|
28
|
+
cluster_name = row['cluster_name']
|
29
|
+
|
30
|
+
stops_corner_cases << { stop_name: stop_name, cluster_name: cluster_name }
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def import_stops_data
|
36
|
+
@stops.each do |row|
|
37
|
+
latitude = row.lat
|
38
|
+
longitude = row.lon
|
39
|
+
stop_name = row.name
|
40
|
+
|
41
|
+
stop_name = @stops_corner_cases.find { |entry| entry[:stop_name] == stop_name }[:cluster_name] if stops_corner_cases.find { |entry| entry[:stop_name] == stop_name }
|
42
|
+
|
43
|
+
@stops_names << stop_name
|
44
|
+
@stops_data << [latitude, longitude]
|
45
|
+
@stops_redis_geodata << [longitude, latitude, "#{longitude},#{latitude}"]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def import_stops_data(*args)
|
51
|
+
@data_import = DataImport.new(*args)
|
52
|
+
{
|
53
|
+
stops_data: @data_import.stops_data,
|
54
|
+
stops_names: @data_import.stops_names,
|
55
|
+
stops_redis_geodata: @data_import.stops_redis_geodata
|
56
|
+
}
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
include DataImport
|
@@ -0,0 +1,180 @@
|
|
1
|
+
## https://github.com/shiguodong/dbscan (fork)
|
2
|
+
|
3
|
+
require 'distance_measures'
|
4
|
+
require 'text'
|
5
|
+
require 'geocoder'
|
6
|
+
require_relative 'redis_geodata'
|
7
|
+
|
8
|
+
class Array
|
9
|
+
def haversine_distance2(n)
|
10
|
+
Geocoder::Calculations.distance_between(self, n)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
module DBSCAN
|
15
|
+
class Clusterer
|
16
|
+
attr_accessor :points, :options, :clusters
|
17
|
+
|
18
|
+
def initialize(points, stops_redis_geodata, options = {})
|
19
|
+
options[:distance] = :euclidean_distance unless options[:distance]
|
20
|
+
options[:labels] = [] unless options[:labels]
|
21
|
+
|
22
|
+
c = 0
|
23
|
+
redis_geodata_import(stops_redis_geodata, options[:epsilon])
|
24
|
+
@points = points.map { |e| po = Point.new(e, options[:labels][c]); c +=1; po }
|
25
|
+
@options = options
|
26
|
+
@clusters = {-1 => []}
|
27
|
+
|
28
|
+
clusterize!
|
29
|
+
end
|
30
|
+
|
31
|
+
def clusterize!
|
32
|
+
current_cluster = -1
|
33
|
+
@points.each do |point|
|
34
|
+
next if point.visited?
|
35
|
+
|
36
|
+
point.visit!
|
37
|
+
neighbors = inmediate_neighbors(point)
|
38
|
+
|
39
|
+
if neighbors.size >= options[:min_points]
|
40
|
+
current_cluster += 1
|
41
|
+
point.cluster = current_cluster
|
42
|
+
cluster = [point].push(add_connected(neighbors, current_cluster))
|
43
|
+
clusters[current_cluster] = cluster.flatten
|
44
|
+
|
45
|
+
# Get Cluster Name
|
46
|
+
labels = clusters[current_cluster].map { |e| e.label.capitalize }
|
47
|
+
cluster_name = find_cluster_name(labels)
|
48
|
+
|
49
|
+
# Get Cluster Position
|
50
|
+
cluster_pos = find_cluster_position(clusters[current_cluster])
|
51
|
+
|
52
|
+
clusters[current_cluster].each { |e|
|
53
|
+
e.cluster_name = cluster_name
|
54
|
+
e.cluster_pos = cluster_pos
|
55
|
+
}
|
56
|
+
else
|
57
|
+
clusters[-1].push(point)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def results
|
63
|
+
hash = {}
|
64
|
+
@clusters.dup.each { |cluster_index, value| hash[cluster_index] = value.flatten.map(&:items) unless value.flatten.empty? }
|
65
|
+
hash
|
66
|
+
end
|
67
|
+
|
68
|
+
def labeled_results
|
69
|
+
hash = {}
|
70
|
+
@clusters.each do |cluster_index, elements|
|
71
|
+
hash.store(cluster_index, [])
|
72
|
+
elements.each do |e|
|
73
|
+
hash[cluster_index].push(
|
74
|
+
{
|
75
|
+
stop_id: nil,
|
76
|
+
stop_code: nil,
|
77
|
+
cluster_name: e.cluster_name,
|
78
|
+
cluster_pos: e.cluster_pos,
|
79
|
+
stop_name: e.label,
|
80
|
+
stop_lat: e.items[0],
|
81
|
+
stop_lon: e.items[1],
|
82
|
+
parent_station: nil
|
83
|
+
}
|
84
|
+
)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
hash
|
88
|
+
end
|
89
|
+
|
90
|
+
def inmediate_neighbors(point)
|
91
|
+
neighbors = []
|
92
|
+
geosearch_results = geosearch(point.items[1], point.items[0])
|
93
|
+
geosearch_results.each do |neighbor_pos|
|
94
|
+
coordinates = neighbor_pos.split(',')
|
95
|
+
neighbor = @points.find { |point| point.items[0] == coordinates[1] &&
|
96
|
+
point.items[1] == coordinates[0] }
|
97
|
+
next unless neighbor
|
98
|
+
|
99
|
+
string_distance = Text::Levenshtein.distance(point.label.downcase, neighbor.label.downcase)
|
100
|
+
similarity = 1 - string_distance.to_f / [point.label.length, point.label.length].max
|
101
|
+
neighbors.push(neighbor) if similarity > options[:similarity]
|
102
|
+
end
|
103
|
+
neighbors
|
104
|
+
end
|
105
|
+
|
106
|
+
def add_connected(neighbors, current_cluster)
|
107
|
+
cluster_points = []
|
108
|
+
neighbors.each do |point|
|
109
|
+
unless point.visited?
|
110
|
+
point.visit!
|
111
|
+
new_points = inmediate_neighbors(point)
|
112
|
+
|
113
|
+
if new_points.size >= options[:min_points]
|
114
|
+
new_points.each do |p|
|
115
|
+
unless neighbors.include?(p)
|
116
|
+
neighbors.push(p)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
unless point.cluster
|
123
|
+
cluster_points.push(point)
|
124
|
+
point.cluster = current_cluster
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
cluster_points
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def find_cluster_name(labels)
|
133
|
+
words = labels.map { |label| label.strip.split }
|
134
|
+
common_title = ''
|
135
|
+
|
136
|
+
# Loop through each word index starting from the first
|
137
|
+
(0...words.first.length).each do |i|
|
138
|
+
words_at_index = words.map { |word_list| word_list[i] }
|
139
|
+
|
140
|
+
break unless words_at_index.uniq.length == 1
|
141
|
+
|
142
|
+
common_title += " #{words_at_index.first.capitalize}"
|
143
|
+
end
|
144
|
+
|
145
|
+
common_title.strip! ? common_title : labels.first
|
146
|
+
end
|
147
|
+
|
148
|
+
def find_cluster_position(cluster)
|
149
|
+
total_lat = cluster.map { |e| e.items[0].to_f }.sum
|
150
|
+
total_lon = cluster.map { |e| e.items[1].to_f }.sum
|
151
|
+
avg_lat = total_lat / cluster.size
|
152
|
+
avg_lon = total_lon / cluster.size
|
153
|
+
[avg_lat, avg_lon]
|
154
|
+
end
|
155
|
+
|
156
|
+
class Point
|
157
|
+
attr_accessor :items, :cluster, :visited, :label, :cluster_name, :cluster_pos
|
158
|
+
|
159
|
+
define_method(:visited?) { @visited }
|
160
|
+
define_method(:visit!) { @visited = true }
|
161
|
+
def initialize(point, label)
|
162
|
+
@items,
|
163
|
+
@cluster,
|
164
|
+
@visited,
|
165
|
+
@label = point,
|
166
|
+
nil,
|
167
|
+
false,
|
168
|
+
label,
|
169
|
+
@cluster_name,
|
170
|
+
@cluster_pos = []
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
def DBSCAN(* args)
|
175
|
+
clusterer = Clusterer.new(*args)
|
176
|
+
clusterer.labeled_results
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
include DBSCAN
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# lib/redis_geodata.rb
|
2
|
+
require 'redis'
|
3
|
+
|
4
|
+
module RedisGeodata
|
5
|
+
VERSION='0.0.1'
|
6
|
+
attr_accessor :redis
|
7
|
+
|
8
|
+
class RedisGeodata
|
9
|
+
attr_accessor :stops, :key, :redis, :epsilon
|
10
|
+
|
11
|
+
def initialize(stops, epsilon)
|
12
|
+
@redis = Redis.new(url: 'redis://127.0.0.1:6379')
|
13
|
+
@stops = stops
|
14
|
+
@key = 'stops'
|
15
|
+
@epsilon = epsilon
|
16
|
+
geoadd
|
17
|
+
end
|
18
|
+
|
19
|
+
def geoadd
|
20
|
+
@redis.geoadd(@key, *@stops)
|
21
|
+
@redis.expire(@key, 100_000_0)
|
22
|
+
end
|
23
|
+
|
24
|
+
def geosearch(longitude, latitude)
|
25
|
+
list = @redis.georadius(@key, longitude, latitude, @epsilon, 'km')
|
26
|
+
list.reject! { |point| point == longitude.to_s + "," + latitude.to_s }
|
27
|
+
list
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def redis_geodata_import(*args)
|
32
|
+
@redis = RedisGeodata.new(*args)
|
33
|
+
end
|
34
|
+
|
35
|
+
def geosearch(*args)
|
36
|
+
@redis.geosearch(*args)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
include RedisGeodata
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# lib/gtfs_stops_clustering.rb
|
2
|
+
|
3
|
+
require 'gtfs'
|
4
|
+
require 'csv'
|
5
|
+
require_relative './gtfs_stops_clustering/data_import'
|
6
|
+
require_relative './gtfs_stops_clustering/dbscan'
|
7
|
+
|
8
|
+
module GtfsStopsClustering
|
9
|
+
VERSION='0.0.1'
|
10
|
+
attr_accessor :gtfs_stops_clustering
|
11
|
+
|
12
|
+
class GtfsStopsClustering
|
13
|
+
attr_accessor :clusters, :gtfs_urls, :gtfs_stops, :stops_config_path, :epsilon, :min_points, :names_similarity
|
14
|
+
|
15
|
+
def initialize(gtfs_urls, epsilon, min_points, names_similarity, stops_config_path)
|
16
|
+
@clusters = []
|
17
|
+
unless gtfs_urls.empty?
|
18
|
+
@gtfs_paths = gtfs_urls
|
19
|
+
@stops_config_path = stops_config_path
|
20
|
+
@epsilon = epsilon
|
21
|
+
@min_points = min_points
|
22
|
+
@names_similarity = names_similarity
|
23
|
+
@gtfs_stops = create_stops_merged
|
24
|
+
clusterize_stops_csv(@gtfs_stops)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def create_stops_merged
|
29
|
+
gtfs_stops = []
|
30
|
+
@gtfs_paths.each do |gtfs_path|
|
31
|
+
gtfs = GTFS::Source.build(gtfs_path)
|
32
|
+
gtfs_stops << gtfs.stops
|
33
|
+
end
|
34
|
+
gtfs_stops.flatten
|
35
|
+
end
|
36
|
+
|
37
|
+
def clusterize_stops_csv(stops_merged)
|
38
|
+
data = import_stops_data(stops_merged, @stops_config_path)
|
39
|
+
@clusters = DBSCAN( data[:stops_data], data[:stops_redis_geodata], :epsilon => @epsilon, :min_points => @min_points, :similarity => @names_similarity, :distance => :haversine_distance2, :labels => data[:stops_names] )
|
40
|
+
|
41
|
+
@clusters.each do |cluster_id, cluster|
|
42
|
+
cluster.each do |stop|
|
43
|
+
gtfs_stop = @gtfs_stops.find { |e| e.lat == stop[:stop_lat] && e.lon == stop[:stop_lon] }
|
44
|
+
stop[:stop_id] = gtfs_stop.id
|
45
|
+
stop[:stop_code] = gtfs_stop.code
|
46
|
+
stop[:parent_station] = gtfs_stop.parent_station
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
output_path = 'stop_clusters.txt'
|
51
|
+
File.open(output_path, 'w') do |file|
|
52
|
+
@clusters.each do |cluster_id, cluster |
|
53
|
+
file.puts "Cluster #{cluster_id}"
|
54
|
+
cluster.each do |point|
|
55
|
+
file.puts point.inspect
|
56
|
+
end
|
57
|
+
file.puts
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def gtfs_stops_clusters(gtfs_urls, epsilon, min_points, names_similarity = 1, stop_config_path = '')
|
64
|
+
@gtfs_stops_clustering = GtfsStopsClustering.new(gtfs_urls, epsilon, min_points, names_similarity, stop_config_path)
|
65
|
+
@gtfs_stops_clustering.clusters
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
include GtfsStopsClustering
|
metadata
ADDED
@@ -0,0 +1,158 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gtfs_stops_clustering
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Visco01
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-12-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: distance_measures
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.0.6
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.0.6
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: text
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.3'
|
34
|
+
- - ">="
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: 1.3.1
|
37
|
+
type: :runtime
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - "~>"
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '1.3'
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 1.3.1
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: geocoder
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - "~>"
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '1.8'
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: 1.8.2
|
57
|
+
type: :runtime
|
58
|
+
prerelease: false
|
59
|
+
version_requirements: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - "~>"
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '1.8'
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: 1.8.2
|
67
|
+
- !ruby/object:Gem::Dependency
|
68
|
+
name: csv
|
69
|
+
requirement: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - "~>"
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '3.2'
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: 3.2.8
|
77
|
+
type: :runtime
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - "~>"
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '3.2'
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: 3.2.8
|
87
|
+
- !ruby/object:Gem::Dependency
|
88
|
+
name: redis
|
89
|
+
requirement: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - "~>"
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '5.0'
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 5.0.8
|
97
|
+
type: :runtime
|
98
|
+
prerelease: false
|
99
|
+
version_requirements: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '5.0'
|
104
|
+
- - ">="
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: 5.0.8
|
107
|
+
- !ruby/object:Gem::Dependency
|
108
|
+
name: gtfs
|
109
|
+
requirement: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
111
|
+
- - "~>"
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: 0.4.1
|
114
|
+
type: :runtime
|
115
|
+
prerelease: false
|
116
|
+
version_requirements: !ruby/object:Gem::Requirement
|
117
|
+
requirements:
|
118
|
+
- - "~>"
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: 0.4.1
|
121
|
+
description: A gem to read GTFS stops data and create clusters based on coordinates
|
122
|
+
and stop names' similarities.
|
123
|
+
email:
|
124
|
+
- visconti373@gmail.com
|
125
|
+
executables: []
|
126
|
+
extensions: []
|
127
|
+
extra_rdoc_files: []
|
128
|
+
files:
|
129
|
+
- lib/gtfs_stops_clustering.rb
|
130
|
+
- lib/gtfs_stops_clustering/data_import.rb
|
131
|
+
- lib/gtfs_stops_clustering/dbscan.rb
|
132
|
+
- lib/gtfs_stops_clustering/redis_geodata.rb
|
133
|
+
- lib/gtfs_stops_clustering/version.rb
|
134
|
+
homepage:
|
135
|
+
licenses:
|
136
|
+
- MIT
|
137
|
+
metadata: {}
|
138
|
+
post_install_message:
|
139
|
+
rdoc_options: []
|
140
|
+
require_paths:
|
141
|
+
- lib
|
142
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
143
|
+
requirements:
|
144
|
+
- - ">="
|
145
|
+
- !ruby/object:Gem::Version
|
146
|
+
version: 2.6.0
|
147
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
148
|
+
requirements:
|
149
|
+
- - ">="
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
version: '0'
|
152
|
+
requirements: []
|
153
|
+
rubygems_version: 3.4.10
|
154
|
+
signing_key:
|
155
|
+
specification_version: 4
|
156
|
+
summary: A gem to read GTFS stops data and create clusters based on coordinates and
|
157
|
+
stop names' similarities.
|
158
|
+
test_files: []
|