nettracer3d 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -724,32 +724,29 @@ def get_degrees(nodes, network, down_factor = None, directory = None, centroids
724
724
 
725
725
 
726
726
  def remove_dupes(network):
727
- """Removes duplicate node connections from network"""
727
+ """Remove Duplicates using numpy arrays"""
728
728
  if type(network) == str:
729
729
  network = read_excel_to_lists
730
-
731
- compare_list = []
732
-
733
- nodesA = network[0]
734
- nodesB = network[1]
735
- edgesC = network[2]
736
-
737
- # Iterate in reverse order to safely delete elements
738
- for i in range(len(nodesA) - 1, -1, -1):
739
- item = [nodesA[i], nodesB[i]]
740
- reverse = [nodesB[i], nodesA[i]]
741
- if item in compare_list or reverse in compare_list:
742
- del nodesA[i]
743
- del nodesB[i]
744
- del edgesC[i]
745
- continue
746
- else:
747
- compare_list.append(item)
748
-
749
- master_list = [nodesA, nodesB, edgesC]
750
-
751
- return master_list
752
-
730
+
731
+ nodesA = np.array(network[0])
732
+ nodesB = np.array(network[1])
733
+ edgesC = np.array(network[2])
734
+
735
+ # Create normalized edges (smaller node first)
736
+ edges = np.column_stack([np.minimum(nodesA, nodesB), np.maximum(nodesA, nodesB)])
737
+
738
+ # Find unique edges and their indices
739
+ _, unique_indices = np.unique(edges, axis=0, return_index=True)
740
+
741
+ # Sort indices to maintain original order
742
+ unique_indices = np.sort(unique_indices)
743
+
744
+ # Extract unique connections
745
+ filtered_nodesA = nodesA[unique_indices].tolist()
746
+ filtered_nodesB = nodesB[unique_indices].tolist()
747
+ filtered_edgesC = edgesC[unique_indices].tolist()
748
+
749
+ return [filtered_nodesA, filtered_nodesB, filtered_edgesC]
753
750
 
754
751
 
755
752
 
@@ -878,210 +875,193 @@ def get_distance_list(centroids, network, xy_scale, z_scale):
878
875
 
879
876
 
880
877
  def prune_samenode_connections(networkfile, nodeIDs):
881
- """Remove all connections between nodes of the same ID, to evaluate solely connections to other objects"""
882
-
878
+ """Even faster numpy-based version for very large datasets"""
879
+ import numpy as np
880
+
881
+ # Handle nodeIDs input
883
882
  if type(nodeIDs) == str:
884
- # Read the Excel file into a DataFrame
885
883
  df = pd.read_excel(nodeIDs)
886
-
887
- # Convert the DataFrame to a dictionary
888
884
  data_dict = pd.Series(df.iloc[:, 1].values, index=df.iloc[:, 0]).to_dict()
889
885
  else:
890
886
  data_dict = nodeIDs
891
-
887
+
888
+ # Handle networkfile input
892
889
  if type(networkfile) == str:
893
- # Read the network file into lists
894
890
  master_list = read_excel_to_lists(networkfile)
895
891
  else:
896
892
  master_list = networkfile
897
-
898
- nodesA = master_list[0]
899
- nodesB = master_list[1]
900
- edgesC = master_list[2]
901
-
902
- # Iterate in reverse order to safely delete elements
903
- for i in range(len(nodesA) - 1, -1, -1):
904
- nodeA = nodesA[i]
905
- nodeB = nodesB[i]
906
-
907
- if data_dict.get(nodeA) == data_dict.get(nodeB):
908
- # Remove the item from all lists
909
- del nodesA[i]
910
- del nodesB[i]
911
- try:
912
- del edgesC[i]
913
- except:
914
- pass
915
-
916
- save_list = []
917
-
918
- for i in range(len(nodesA)):
919
- try:
920
- item = [nodesA[i], nodesB[i], edgesC[i]]
921
- except:
922
- item = [nodesA[i], nodesB[i], None]
923
- save_list.append(item)
924
-
893
+
894
+ nodesA = np.array(master_list[0])
895
+ nodesB = np.array(master_list[1])
896
+
897
+ # Handle edgesC safely
898
+ if len(master_list) > 2 and master_list[2]:
899
+ edgesC = np.array(master_list[2], dtype=object)
900
+ else:
901
+ edgesC = np.array([None] * len(nodesA), dtype=object)
902
+
903
+ # Vectorized lookup of node IDs
904
+ idsA = np.array([data_dict.get(node) for node in nodesA])
905
+ idsB = np.array([data_dict.get(node) for node in nodesB])
906
+
907
+ # Create boolean mask - keep where IDs are different
908
+ keep_mask = idsA != idsB
909
+
910
+ # Apply filter
911
+ filtered_nodesA = nodesA[keep_mask].tolist()
912
+ filtered_nodesB = nodesB[keep_mask].tolist()
913
+ filtered_edgesC = edgesC[keep_mask].tolist()
914
+
915
+ # Create save_list
916
+ save_list = [[filtered_nodesA[i], filtered_nodesB[i], filtered_edgesC[i]]
917
+ for i in range(len(filtered_nodesA))]
918
+
919
+ # Handle file saving
925
920
  if type(networkfile) == str:
926
-
927
921
  filename = 'network_pruned_away_samenode_connections.xlsx'
928
-
929
922
  nettracer.create_and_save_dataframe(save_list, filename)
930
-
931
923
  print(f"Pruned network saved to {filename}")
932
-
933
- output_dict = data_dict.copy()
934
- for item in data_dict:
935
- if item not in nodesA and item not in nodesB:
936
- del output_dict[item]
937
-
938
- filename = 'Node_identities_pruned_away_samenode_connections.xlsx'
939
-
924
+
925
+ # Create output_dict
926
+ nodes_in_filtered = set(filtered_nodesA + filtered_nodesB)
927
+ output_dict = {node: data_dict[node] for node in nodes_in_filtered
928
+ if node in data_dict}
929
+
930
+ # Handle identity file saving
940
931
  if type(networkfile) == str:
941
-
932
+ filename = 'Node_identities_pruned_away_samenode_connections.xlsx'
942
933
  save_singval_dict(output_dict, 'NodeID', 'Identity', filename)
943
-
944
934
  print(f"Pruned network identities saved to {filename}")
945
-
946
- master_list = [nodesA, nodesB, edgesC]
947
-
948
-
949
- # Optional: Return the updated lists if needed
935
+
936
+ master_list = [filtered_nodesA, filtered_nodesB, filtered_edgesC]
950
937
  return master_list, output_dict
951
938
 
952
939
 
953
940
  def isolate_internode_connections(networkfile, nodeIDs, ID1, ID2):
954
- """Isolate only connections between two specific node identified elements of a network"""
955
-
941
+ """Even faster numpy-based version for very large datasets"""
942
+ import numpy as np
943
+
944
+ # Handle nodeIDs input
956
945
  if type(nodeIDs) == str:
957
- """Remove all connections between nodes of the same ID, to evaluate solely connections to other objects"""
958
- # Read the Excel file into a DataFrame
959
946
  df = pd.read_excel(nodeIDs)
960
-
961
- # Convert the DataFrame to a dictionary
962
947
  data_dict = pd.Series(df.iloc[:, 1].values, index=df.iloc[:, 0]).to_dict()
963
948
  else:
964
949
  data_dict = nodeIDs
965
-
950
+
951
+ # Handle networkfile input
966
952
  if type(networkfile) == str:
967
- # Read the network file into lists
968
953
  master_list = read_excel_to_lists(networkfile)
969
954
  else:
970
955
  master_list = networkfile
971
-
972
- nodesA = master_list[0]
973
- nodesB = master_list[1]
974
- edgesC = master_list[2]
975
-
976
- legalIDs = [ID1, ID2]
977
-
978
- for i in range(len(nodesA) - 1, -1, -1):
979
- nodeA = nodesA[i]
980
- nodeB = nodesB[i]
981
-
982
- valueA = str(data_dict.get(nodeA))
983
- valueB = str(data_dict.get(nodeB))
984
-
985
- # Check if both values are not in legalIDs
986
- if valueA not in legalIDs or valueB not in legalIDs:
987
- # Remove the item from all lists
988
- del nodesA[i]
989
- del nodesB[i]
990
- del edgesC[i]
991
-
992
- save_list = []
993
-
994
- for i in range(len(nodesA)):
995
- item = [nodesA[i], nodesB[i], edgesC[i]]
996
- save_list.append(item)
997
-
998
-
956
+
957
+ nodesA = np.array(master_list[0])
958
+ nodesB = np.array(master_list[1])
959
+ edgesC = np.array(master_list[2])
960
+
961
+ # Vectorized lookup of node values
962
+ valuesA = np.array([str(data_dict.get(node, '')) for node in nodesA])
963
+ valuesB = np.array([str(data_dict.get(node, '')) for node in nodesB])
964
+
965
+ # Create boolean mask for filtering
966
+ legalIDs_set = {str(ID1), str(ID2)}
967
+ maskA = np.array([val in legalIDs_set for val in valuesA])
968
+ maskB = np.array([val in legalIDs_set for val in valuesB])
969
+ keep_mask = maskA & maskB
970
+
971
+ # Apply filter
972
+ filtered_nodesA = nodesA[keep_mask].tolist()
973
+ filtered_nodesB = nodesB[keep_mask].tolist()
974
+ filtered_edgesC = edgesC[keep_mask].tolist()
975
+
976
+ # Create save_list
977
+ save_list = [[filtered_nodesA[i], filtered_nodesB[i], filtered_edgesC[i]]
978
+ for i in range(len(filtered_nodesA))]
979
+
980
+ # Handle file saving
999
981
  if type(networkfile) == str:
1000
982
  filename = f'network_isolated_{ID1}_{ID2}_connections.xlsx'
1001
-
1002
983
  nettracer.create_and_save_dataframe(save_list, filename)
1003
-
1004
984
  print(f"Isolated internode network saved to {filename}")
1005
-
1006
- output_dict = data_dict.copy()
1007
- for item in data_dict:
1008
- if item not in nodesA and item not in nodesB:
1009
- del output_dict[item]
1010
-
985
+
986
+ # Create output_dict
987
+ nodes_in_filtered = set(filtered_nodesA + filtered_nodesB)
988
+ output_dict = {node: data_dict[node] for node in nodes_in_filtered
989
+ if node in data_dict}
990
+
991
+ # Handle identity file saving
1011
992
  if type(networkfile) == str:
1012
-
1013
993
  filename = f'Node_identities_for_isolated_{ID1}_{ID2}_network.xlsx'
1014
-
1015
994
  save_singval_dict(output_dict, 'NodeID', 'Identity', filename)
1016
-
1017
995
  print(f"Isolated network identities saved to {filename}")
1018
-
1019
- master_list = [nodesA, nodesB, edgesC]
1020
-
1021
- # Optional: Return the updated lists if needed
996
+
997
+ master_list = [filtered_nodesA, filtered_nodesB, filtered_edgesC]
1022
998
  return master_list, output_dict
1023
999
 
1024
- def edge_to_node(network, node_identities = None, maxnode = None):
1025
- """Converts edge IDs into nodes, so that the node-edge relationships can be more easily visualized"""
1026
-
1000
+ def edge_to_node(network, node_identities=None, maxnode=None):
1001
+ """Even faster numpy-based version for very large datasets"""
1002
+ import numpy as np
1003
+
1004
+ # Handle node_identities input
1027
1005
  if node_identities is not None and type(node_identities) == str:
1028
- # Read the Excel file into a DataFrame
1029
1006
  df = pd.read_excel(node_identities)
1030
-
1031
- # Convert the DataFrame to a dictionary
1032
1007
  identity_dict = pd.Series(df.iloc[:, 1].values, index=df.iloc[:, 0]).to_dict()
1033
1008
  elif node_identities is not None and type(node_identities) != str:
1034
- identity_dict = node_identities
1009
+ identity_dict = node_identities.copy()
1035
1010
  else:
1036
1011
  identity_dict = {}
1037
-
1038
- new_network = []
1039
-
1040
- # Read the network file into lists
1012
+
1013
+ # Handle network input
1041
1014
  if type(network) == str:
1042
1015
  master_list = read_excel_to_lists(network)
1043
1016
  else:
1044
1017
  master_list = network
1045
-
1046
- nodesA = master_list[0]
1047
- nodesB = master_list[1]
1048
- edgesC = master_list[2]
1049
- allnodes = set(nodesA + nodesB)
1018
+
1019
+ # Convert to numpy arrays for vectorized operations
1020
+ nodesA = np.array(master_list[0])
1021
+ nodesB = np.array(master_list[1])
1022
+ edgesC = np.array(master_list[2])
1023
+
1024
+ # Get all unique nodes efficiently
1025
+ allnodes = set(np.concatenate([nodesA, nodesB]).tolist())
1026
+
1027
+ # Calculate maxnode if not provided
1050
1028
  if maxnode is None:
1051
- maxnode = max(allnodes)
1029
+ maxnode = int(np.max(np.concatenate([nodesA, nodesB])))
1030
+
1052
1031
  print(f"Transposing all edge vals by {maxnode} to prevent ID overlap with preexisting nodes")
1053
-
1054
-
1055
- for i in range(len(edgesC)):
1056
- edgesC[i] = edgesC[i] + maxnode
1057
-
1058
- alledges = set(edgesC)
1059
-
1060
- for i in range(len(edgesC)):
1061
- newpair1 = [nodesA[i], edgesC[i], 0]
1062
- newpair2 = [edgesC[i], nodesB[i], 0]
1063
- new_network.append(newpair1)
1064
- new_network.append(newpair2)
1065
-
1066
- for item in allnodes:
1067
- if item not in identity_dict:
1068
- identity_dict[item] = 'Node'
1069
-
1070
- for item in alledges:
1071
- identity_dict[item] = 'Edge'
1072
-
1032
+
1033
+ # Vectorized edge transposition
1034
+ transposed_edges = edgesC + maxnode
1035
+
1036
+ # Create new_network using vectorized operations
1037
+ # Create arrays for the two types of connections
1038
+ connections1 = np.column_stack([nodesA, transposed_edges, np.zeros(len(nodesA))])
1039
+ connections2 = np.column_stack([transposed_edges, nodesB, np.zeros(len(nodesB))])
1040
+
1041
+ # Combine and convert to list format
1042
+ new_network_array = np.vstack([connections1, connections2])
1043
+ new_network = new_network_array.astype(int).tolist()
1044
+
1045
+ # Update identity_dict efficiently
1046
+ # Add missing nodes
1047
+ for node in allnodes:
1048
+ if node not in identity_dict:
1049
+ identity_dict[node] = 'Node'
1050
+
1051
+ # Add all edges at once
1052
+ for edge in transposed_edges.tolist():
1053
+ identity_dict[edge] = 'Edge'
1054
+
1055
+ # Handle output
1073
1056
  if type(network) == str:
1074
-
1075
1057
  save_singval_dict(identity_dict, 'NodeID', 'Identity', 'edge_to_node_identities.xlsx')
1076
-
1077
1058
  nettracer.create_and_save_dataframe(new_network, 'edge-node_network.xlsx')
1078
-
1079
1059
  else:
1080
-
1081
1060
  df = nettracer.create_and_save_dataframe(new_network)
1082
1061
  return df, identity_dict, maxnode
1083
1062
 
1084
1063
 
1064
+
1085
1065
  def save_singval_dict(dict, index_name, valname, filename):
1086
1066
  # Convert dictionary to DataFrame
1087
1067
  df = pd.DataFrame.from_dict(dict, orient='index', columns=[valname])
@@ -1111,86 +1091,98 @@ def save_singval_dict(dict, index_name, valname, filename):
1111
1091
 
1112
1092
 
1113
1093
  def rand_net_weighted(num_rows, num_nodes, nodes):
1114
-
1115
- random_network = []
1116
- k = 0
1117
-
1118
- while k < num_rows:
1119
-
1120
- for i in range(0, num_nodes):
1121
- random_partner = random.randint(0, len(nodes)-1)
1122
- random_partner = nodes[random_partner]
1123
- if random_partner == nodes[i]:
1124
- while random_partner == nodes[i]:
1125
- random_partner = random.randint(0, len(nodes)-1)
1126
- random_partner = nodes[random_partner]
1127
- random_pair = [nodes[i], random_partner, 0]
1128
- random_network.append(random_pair)
1129
- k+= 1
1130
- if k == num_rows:
1131
- break
1132
- random.shuffle(nodes)
1133
-
1134
-
1094
+ """Optimized weighted random network generation - allows duplicate edges"""
1095
+ nodes_array = np.array(nodes)
1096
+ n_nodes = len(nodes)
1097
+
1098
+ # Pre-generate all random indices at once
1099
+ node_indices = np.random.randint(0, n_nodes, num_rows)
1100
+ partner_indices = np.random.randint(0, n_nodes, num_rows)
1101
+
1102
+ # Fix self-connections by regenerating only where needed
1103
+ self_connection_mask = node_indices == partner_indices
1104
+ while np.any(self_connection_mask):
1105
+ partner_indices[self_connection_mask] = np.random.randint(0, n_nodes, np.sum(self_connection_mask))
1106
+ self_connection_mask = node_indices == partner_indices
1107
+
1108
+ # Create network efficiently using vectorized operations
1109
+ random_network = np.column_stack([
1110
+ nodes_array[node_indices],
1111
+ nodes_array[partner_indices],
1112
+ np.zeros(num_rows, dtype=int)
1113
+ ]).tolist()
1114
+
1135
1115
  df = nettracer.create_and_save_dataframe(random_network)
1136
-
1137
1116
  G, edge_weights = weighted_network(df)
1138
-
1139
1117
  return G, df
1140
1118
 
1119
+
1141
1120
  def rand_net(num_rows, num_nodes, nodes):
1121
+ """Optimized unweighted random network generation - prevents duplicate edges"""
1142
1122
  random_network = []
1143
- k=0
1144
-
1145
- while k < num_rows:
1146
- for i in range(num_nodes):
1147
- # Generate a new random partner until it's valid
1148
- while True:
1149
- random_partner_index = random.randint(0, len(nodes) - 1)
1150
- random_partner = nodes[random_partner_index]
1123
+ seen_edges = set()
1124
+ nodes_set = set(nodes)
1125
+ n_nodes = len(nodes)
1126
+
1127
+ # Pre-calculate maximum possible unique edges
1128
+ max_possible_edges = n_nodes * (n_nodes - 1) # No self-connections, but allows both directions
1129
+
1130
+ if num_rows > max_possible_edges:
1131
+ raise ValueError(f"Cannot generate {num_rows} unique edges with {n_nodes} nodes. Maximum possible: {max_possible_edges}")
1132
+
1133
+ attempts = 0
1134
+ max_attempts = num_rows * 10 # Prevent infinite loops
1135
+
1136
+ while len(random_network) < num_rows and attempts < max_attempts:
1137
+ # Generate batch of random pairs
1138
+ batch_size = min(1000, num_rows - len(random_network))
1139
+
1140
+ node_indices = np.random.randint(0, n_nodes, batch_size)
1141
+ partner_indices = np.random.randint(0, n_nodes, batch_size)
1142
+
1143
+ for i in range(batch_size):
1144
+ node_idx = node_indices[i]
1145
+ partner_idx = partner_indices[i]
1146
+
1147
+ # Skip self-connections
1148
+ if node_idx == partner_idx:
1149
+ attempts += 1
1150
+ continue
1151
1151
 
1152
- # Check if the random partner is different from the current node
1153
- # and if the pair is not already in the network
1154
- if random_partner != nodes[i] and [nodes[i], random_partner, 0] not in random_network and [random_partner, nodes[i], 0] not in random_network:
1155
- break
1156
-
1157
- random_pair = [nodes[i], random_partner, 0]
1158
- random_network.append(random_pair)
1159
- k += 1
1160
-
1161
- if k == num_rows:
1152
+ node = nodes[node_idx]
1153
+ partner = nodes[partner_idx]
1154
+
1155
+ # Create normalized edge tuple to check for duplicates (both directions)
1156
+ edge_tuple = tuple(sorted([node, partner]))
1157
+
1158
+ if edge_tuple not in seen_edges:
1159
+ seen_edges.add(edge_tuple)
1160
+ random_network.append([node, partner, 0])
1161
+
1162
+ attempts += 1
1163
+
1164
+ if len(random_network) >= num_rows:
1162
1165
  break
1163
-
1164
-
1165
- # Shuffle nodes for the next iteration
1166
- random.shuffle(nodes)
1167
-
1168
-
1169
-
1166
+
1167
+ if len(random_network) < num_rows:
1168
+ print(f"Warning: Only generated {len(random_network)} edges out of requested {num_rows}")
1169
+
1170
1170
  df = nettracer.create_and_save_dataframe(random_network)
1171
-
1172
1171
  G, edge_weights = weighted_network(df)
1173
-
1174
1172
  return G, df
1175
1173
 
1176
- def generate_random(G, net_lists, weighted = True):
1177
1174
 
1175
+ def generate_random(G, net_lists, weighted=True):
1176
+ """Optimized random network generation dispatcher"""
1178
1177
  nodes = list(G.nodes)
1179
-
1180
1178
  num_nodes = len(nodes)
1181
-
1182
1179
  num_rows = len(net_lists[0])
1183
-
1184
-
1180
+
1185
1181
  if weighted:
1186
-
1187
- G = rand_net_weighted(num_rows, num_nodes, nodes)
1188
-
1182
+ return rand_net_weighted(num_rows, num_nodes, nodes)
1189
1183
  else:
1184
+ return rand_net(num_rows, num_nodes, nodes)
1190
1185
 
1191
- G = rand_net(num_rows, num_nodes, nodes)
1192
-
1193
- return G
1194
1186
 
1195
1187
  def list_trim(list1, list2, component):
1196
1188
 
nettracer3d/node_draw.py CHANGED
@@ -201,18 +201,28 @@ def degree_draw(degree_dict, centroid_dict, nodes):
201
201
  return draw_array
202
202
 
203
203
  def degree_infect(degree_dict, nodes):
204
-
205
- num_nodes = int(np.max(nodes))
206
- return_nodes = nodes.copy()
207
-
208
- for node in range(1, num_nodes + 1):
209
- if node not in degree_dict:
210
- continue
211
- else:
212
- idxs = np.argwhere(nodes == node)
213
- for idx in idxs:
214
- return_nodes[tuple(idx)] = degree_dict[node]
215
-
204
+ return_nodes = np.zeros_like(nodes) # Start with all zeros
205
+
206
+ if not degree_dict: # Handle empty dict
207
+ return return_nodes
208
+
209
+ # Create arrays for old and new values
210
+ old_vals = np.array(list(degree_dict.keys()))
211
+ new_vals = np.array(list(degree_dict.values()))
212
+
213
+ # Sort for searchsorted to work correctly
214
+ sort_idx = np.argsort(old_vals)
215
+ old_vals_sorted = old_vals[sort_idx]
216
+ new_vals_sorted = new_vals[sort_idx]
217
+
218
+ # Find which nodes exist in the dictionary
219
+ mask = np.isin(nodes, old_vals_sorted)
220
+
221
+ # Only process nodes that exist in the dictionary
222
+ if np.any(mask):
223
+ indices = np.searchsorted(old_vals_sorted, nodes[mask])
224
+ return_nodes[mask] = new_vals_sorted[indices]
225
+
216
226
  return return_nodes
217
227
 
218
228