codecarbon 3.0.0rc7__tar.gz → 3.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/PKG-INFO +1 -1
  2. codecarbon-3.0.2/codecarbon/_version.py +1 -0
  3. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/cli/main.py +21 -4
  4. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/core/api_client.py +26 -19
  5. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/core/units.py +4 -3
  6. {codecarbon-3.0.0rc7/codecarbon/data/hardware → codecarbon-3.0.2/codecarbon/data/hardware/cpu_dataset_builder}/CPU_Create_Dataset.ipynb +87 -47
  7. codecarbon-3.0.2/codecarbon/data/hardware/cpu_dataset_builder/README.md +15 -0
  8. codecarbon-3.0.0rc7/codecarbon/data/hardware/AMD_CPU_desktop_laptop.csv → codecarbon-3.0.2/codecarbon/data/hardware/cpu_dataset_builder/amd_cpu_desktop_dataset.csv +36 -22
  9. codecarbon-3.0.2/codecarbon/data/hardware/cpu_dataset_builder/amd_cpu_scrapper.py +120 -0
  10. codecarbon-3.0.0rc7/codecarbon/data/hardware/AMD_Server_Processor_Specifications.csv → codecarbon-3.0.2/codecarbon/data/hardware/cpu_dataset_builder/amd_cpu_server_dataset.csv +33 -27
  11. codecarbon-3.0.2/codecarbon/data/hardware/cpu_dataset_builder/intel_cpu_ark_dataset.csv +2025 -0
  12. codecarbon-3.0.2/codecarbon/data/hardware/cpu_dataset_builder/intel_cpu_scrapper.py +284 -0
  13. codecarbon-3.0.2/codecarbon/data/hardware/cpu_dataset_builder/merge_scrapped_cpu_power.py +159 -0
  14. codecarbon-3.0.2/codecarbon/data/hardware/cpu_power.csv +4896 -0
  15. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/emissions_tracker.py +108 -34
  16. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/external/geography.py +3 -1
  17. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/external/ram.py +2 -2
  18. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/lock.py +1 -1
  19. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/output_methods/file.py +1 -1
  20. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/pyproject.toml +3 -1
  21. codecarbon-3.0.0rc7/codecarbon/_version.py +0 -1
  22. codecarbon-3.0.0rc7/codecarbon/data/hardware/cpu_power.csv +0 -4043
  23. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/.gitignore +0 -0
  24. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/LICENSE +0 -0
  25. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/README.md +0 -0
  26. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/__init__.py +0 -0
  27. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/cli/__init__.py +0 -0
  28. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/cli/cli_utils.py +0 -0
  29. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/core/__init__.py +0 -0
  30. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/core/cloud.py +0 -0
  31. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/core/co2_signal.py +0 -0
  32. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/core/config.py +0 -0
  33. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/core/cpu.py +0 -0
  34. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/core/emissions.py +0 -0
  35. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/core/gpu.py +0 -0
  36. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/core/measure.py +0 -0
  37. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/core/powermetrics.py +0 -0
  38. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/core/rapl.py +0 -0
  39. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/core/resource_tracker.py +0 -0
  40. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/core/schemas.py +0 -0
  41. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/core/util.py +0 -0
  42. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/canada_provinces.geojson +0 -0
  43. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/cloud/impact.csv +0 -0
  44. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/AMD_EPYC_8024P_8C/compare_cpu_load_and_RAPL-all_cores-AMD_EPYC_8024P_8-Core_Processor-2025-01-14-tasks.csv +0 -0
  45. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/AMD_EPYC_8024P_8C/compare_cpu_load_and_RAPL-all_cores-AMD_EPYC_8024P_8-Core_Processor-2025-01-14.csv +0 -0
  46. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/AMD_EPYC_8024P_8C/compare_cpu_load_and_RAPL-some_cores-AMD_EPYC_8024P_8-Core_Processor-2025-01-14-tasks.csv +0 -0
  47. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/AMD_EPYC_8024P_8C/compare_cpu_load_and_RAPL-some_cores-AMD_EPYC_8024P_8-Core_Processor-2025-01-14.csv +0 -0
  48. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/AMD_Threadripper/compare_cpu_load_and_RAPL-all_cores-AMD_Ryzen_Threadripper_1950X_16-Core_Processor-2025-01-14.csv +0 -0
  49. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/AMD_Threadripper/compare_cpu_load_and_RAPL-some_cores-AMD_Ryzen_Threadripper_1950X_16-Core_Processor-2025-01-14.csv +0 -0
  50. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/E3-1240/compare_cpu_load_and_RAPL-all_cores-Intel(R)_Xeon(R)_CPU_E3-1240_V2_@_3.40GHz-2025-01-14-tasks.csv +0 -0
  51. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/E3-1240/compare_cpu_load_and_RAPL-all_cores-Intel(R)_Xeon(R)_CPU_E3-1240_V2_@_3.40GHz-2025-01-14.csv +0 -0
  52. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/E3-1240/compare_cpu_load_and_RAPL-some_cores-Intel(R)_Xeon(R)_CPU_E3-1240_V2_@_3.40GHz-2025-01-14-tasks.csv +0 -0
  53. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/E3-1240/compare_cpu_load_and_RAPL-some_cores-Intel(R)_Xeon(R)_CPU_E3-1240_V2_@_3.40GHz-2025-01-14.csv +0 -0
  54. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/E5-1240/compare_cpu_load_and_RAPL-all_cores-Intel(R)_Xeon(R)_CPU_E3-1240_V2_@_3.40GHz-2025-01-14-tasks.csv +0 -0
  55. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/E5-1240/compare_cpu_load_and_RAPL-all_cores-Intel(R)_Xeon(R)_CPU_E3-1240_V2_@_3.40GHz-2025-01-14.csv +0 -0
  56. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/E5-1240/compare_cpu_load_and_RAPL-some_cores-Intel(R)_Xeon(R)_CPU_E3-1240_V2_@_3.40GHz-2025-01-14-tasks.csv +0 -0
  57. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/E5-1240/compare_cpu_load_and_RAPL-some_cores-Intel(R)_Xeon(R)_CPU_E3-1240_V2_@_3.40GHz-2025-01-14.csv +0 -0
  58. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/E5-2620/compare_cpu_load_and_RAPL-all_cores-Intel(R)_Xeon(R)_CPU_E5-2620_v3_@_2.40GHz-2025-01-14-tasks.csv +0 -0
  59. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/E5-2620/compare_cpu_load_and_RAPL-all_cores-Intel(R)_Xeon(R)_CPU_E5-2620_v3_@_2.40GHz-2025-01-14.csv +0 -0
  60. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/E5-2620/compare_cpu_load_and_RAPL-some_cores-Intel(R)_Xeon(R)_CPU_E5-2620_v3_@_2.40GHz-2025-01-14-tasks.csv +0 -0
  61. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/hardware/cpu_load_profiling/E5-2620/compare_cpu_load_and_RAPL-some_cores-Intel(R)_Xeon(R)_CPU_E5-2620_v3_@_2.40GHz-2025-01-14.csv +0 -0
  62. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/private_infra/2016/canada_energy_mix.json +0 -0
  63. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/private_infra/2016/global_energy_mix-old.json +0 -0
  64. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/private_infra/2016/usa_emissions.json +0 -0
  65. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/private_infra/2020/01_get_world_carbon_intensity.ipynb +0 -0
  66. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/private_infra/2020/02_convert_csv_to_json.ipynb +0 -0
  67. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/private_infra/2020/03_add_eu_data.ipynb +0 -0
  68. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/private_infra/2020/eu-carbon-intensity-electricity.csv +0 -0
  69. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/private_infra/2023-07-07-22-40-48.png +0 -0
  70. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/private_infra/carbon_intensity_per_source.json +0 -0
  71. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/private_infra/global_energy_mix.json +0 -0
  72. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/private_infra/our_world_in_data.ipynb +0 -0
  73. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/data/private_infra/world_energy_mix.csv +0 -0
  74. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/external/__init__.py +0 -0
  75. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/external/hardware.py +0 -0
  76. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/external/logger.py +0 -0
  77. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/external/scheduler.py +0 -0
  78. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/external/task.py +0 -0
  79. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/input.py +0 -0
  80. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/output.py +0 -0
  81. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/output_methods/__init__.py +0 -0
  82. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/output_methods/base_output.py +0 -0
  83. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/output_methods/emissions_data.py +0 -0
  84. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/output_methods/http.py +0 -0
  85. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/output_methods/logger.py +0 -0
  86. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/output_methods/metrics/__init__.py +0 -0
  87. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/output_methods/metrics/logfire.py +0 -0
  88. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/output_methods/metrics/metric_docs.py +0 -0
  89. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/output_methods/metrics/prometheus.py +0 -0
  90. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/viz/__init__.py +0 -0
  91. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/viz/assets/__init__.py +0 -0
  92. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/viz/assets/car_icon.png +0 -0
  93. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/viz/assets/house_icon.png +0 -0
  94. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/viz/assets/tv_icon.png +0 -0
  95. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/viz/carbonboard.py +0 -0
  96. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/viz/carbonboard_on_api.py +0 -0
  97. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/viz/components.py +0 -0
  98. {codecarbon-3.0.0rc7 → codecarbon-3.0.2}/codecarbon/viz/data.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codecarbon
3
- Version: 3.0.0rc7
3
+ Version: 3.0.2
4
4
  Project-URL: Homepage, https://codecarbon.io/
5
5
  Project-URL: Repository, https://github.com/mlco2/codecarbon
6
6
  Project-URL: Dashboard, http://dashboard.codecarbon.io/
@@ -0,0 +1 @@
1
+ __version__ = "3.0.2"
@@ -96,7 +96,7 @@ def show_config(path: Path = Path("./.codecarbon.config")) -> None:
96
96
  print(org)
97
97
  except Exception as e:
98
98
  raise ValueError(
99
- f"Your configuration is invalid, please run `codecarbon config --init` first! (error: {e})"
99
+ f"Your configuration is invalid, please verify your configuration file at {path}. To start from scratch, run `codecarbon config` and overwrite your configuration file. (error: {e})"
100
100
  )
101
101
 
102
102
 
@@ -107,9 +107,19 @@ def get_fief_auth():
107
107
 
108
108
 
109
109
  def _get_access_token():
110
- access_token_info = get_fief_auth().access_token_info()
111
- access_token = access_token_info["access_token"]
112
- return access_token
110
+ try:
111
+ access_token_info = get_fief_auth().access_token_info()
112
+ access_token = access_token_info["access_token"]
113
+ return access_token
114
+ except Exception as e:
115
+ raise ValueError(
116
+ f"Not able to retrieve the access token, please run `codecarbon login` first! (error: {e})"
117
+ )
118
+
119
+
120
+ def _get_id_token():
121
+ id_token = get_fief_auth()._tokens["id_token"]
122
+ return id_token
113
123
 
114
124
 
115
125
  @codecarbon.command(
@@ -128,6 +138,10 @@ def api_get():
128
138
  @codecarbon.command("login", short_help="Login to CodeCarbon")
129
139
  def login():
130
140
  get_fief_auth().authorize()
141
+ api = ApiClient(endpoint_url=API_URL) # TODO: get endpoint from config
142
+ id_token = _get_id_token()
143
+ api.set_access_token(id_token)
144
+ api.check_auth()
131
145
 
132
146
 
133
147
  def get_api_key(project_id: str):
@@ -214,6 +228,9 @@ def config():
214
228
  description=org_description,
215
229
  )
216
230
  organization = api.create_organization(organization=organization_create)
231
+ if organization is None:
232
+ print("Error creating organization")
233
+ return
217
234
  print(f"Created organization : {organization}")
218
235
  else:
219
236
  organization = [orga for orga in organizations if orga["name"] == org][0]
@@ -39,16 +39,20 @@ class ApiClient: # (AsyncClient)
39
39
 
40
40
  def __init__(
41
41
  self,
42
- # endpoint_url="https://api.codecarbon.io",
43
- endpoint_url="https://dash-dev.cleverapps.io/api", # beta API
42
+ endpoint_url="https://api.codecarbon.io",
44
43
  experiment_id=None,
45
44
  api_key=None,
46
45
  access_token=None,
47
46
  conf=None,
47
+ create_run_automatically=True,
48
48
  ):
49
49
  """
50
- :project_id: ID of the existing project
50
+ :endpoint_url: URL of the API endpoint
51
+ :experiment_id: ID of the experiment
51
52
  :api_key: Code Carbon API_KEY
53
+ :access_token: Code Carbon API access token
54
+ :conf: Metadata of the experiment
55
+ :create_run_automatically: If False, do not create a run. To use API in read only mode.
52
56
  """
53
57
  # super().__init__(base_url=endpoint_url) # (AsyncClient)
54
58
  self.url = endpoint_url
@@ -56,21 +60,12 @@ class ApiClient: # (AsyncClient)
56
60
  self.api_key = api_key
57
61
  self.conf = conf
58
62
  self.access_token = access_token
59
- if self.experiment_id is not None:
63
+ if self.experiment_id is not None and create_run_automatically:
60
64
  self._create_run(self.experiment_id)
61
- import warnings
62
-
63
- # FIXME: remove this warning in the future, once the release is created
64
- warnings.warn(
65
- "Beta API will be reworked, and some features will be removed. If you have data persisted through the API, please be warned that it will be erased with the next API release",
66
- DeprecationWarning,
67
- stacklevel=2,
68
- )
69
65
 
70
66
  def _get_headers(self):
71
67
  headers = {"Content-Type": "application/json"}
72
68
  if self.api_key:
73
- print(type(self.api_key))
74
69
  # set the x-api-token header
75
70
  headers["x-api-token"] = self.api_key
76
71
  elif self.access_token:
@@ -84,6 +79,18 @@ class ApiClient: # (AsyncClient)
84
79
  """
85
80
  self.access_token = token
86
81
 
82
+ def check_auth(self):
83
+ """
84
+ Check API access to user account
85
+ """
86
+ url = self.url + "/auth/check"
87
+ headers = self._get_headers()
88
+ r = requests.get(url=url, timeout=2, headers=headers)
89
+ if r.status_code != 200:
90
+ self._log_error(url, {}, r)
91
+ return None
92
+ return r.json()
93
+
87
94
  def get_list_organizations(self):
88
95
  """
89
96
  List all organizations
@@ -192,9 +199,8 @@ class ApiClient: # (AsyncClient)
192
199
  def add_emission(self, carbon_emission: dict):
193
200
  assert self.experiment_id is not None
194
201
  if self.run_id is None:
195
- # TODO : raise an Exception ?
196
- logger.debug(
197
- "ApiClient.add_emission need a run_id : the initial call may "
202
+ logger.warning(
203
+ "ApiClient.add_emission() need a run_id : the initial call may "
198
204
  + "have failed. Retrying..."
199
205
  )
200
206
  self._create_run(self.experiment_id)
@@ -236,14 +242,15 @@ class ApiClient: # (AsyncClient)
236
242
  return False
237
243
  return True
238
244
 
239
- def _create_run(self, experiment_id):
245
+ def _create_run(self, experiment_id: str):
240
246
  """
241
247
  Create the experiment for project_id
242
- # TODO : Allow to give an existing experiment_id
243
248
  """
244
249
  if self.experiment_id is None:
245
250
  # TODO : raise an Exception ?
246
- logger.error("ApiClient FATAL The API _create_run needs an experiment_id !")
251
+ logger.error(
252
+ "ApiClient FATAL The ApiClient._create_run() needs an experiment_id !"
253
+ )
247
254
  return None
248
255
  try:
249
256
  run = RunCreate(
@@ -61,6 +61,9 @@ class Energy:
61
61
 
62
62
  kWh: float = field(compare=True)
63
63
 
64
+ def __post_init__(self):
65
+ self.kWh = float(self.kWh)
66
+
64
67
  @classmethod
65
68
  def from_power_and_time(cls, *, power: "Power", time: "Time") -> "Energy":
66
69
  assert isinstance(power.kW, float)
@@ -134,10 +137,8 @@ class Power:
134
137
  Returns:
135
138
  Power: Resulting Power estimation
136
139
  """
137
- delta_energy = abs(e2.kWh - e1.kWh)
138
- assert isinstance(delta_energy, float)
140
+ delta_energy = float(abs(e2.kWh - e1.kWh))
139
141
  kW = delta_energy / delay.hours if delay.hours != 0.0 else 0.0
140
- assert isinstance(delta_energy, float)
141
142
  return cls(kW=kW)
142
143
 
143
144
  @classmethod
@@ -357,7 +357,7 @@
357
357
  "\n",
358
358
  "url = \"https://www.intel.com/content/www/us/en/ark/products/series/236644/5th-gen-intel-xeon-scalable-processors.html\"\n",
359
359
  "response = requests.get(url)\n",
360
- "soup = BeautifulSoup(response.text, 'html.parser')"
360
+ "soup = BeautifulSoup(response.text, \"html.parser\")"
361
361
  ]
362
362
  },
363
363
  {
@@ -454,25 +454,26 @@
454
454
  ],
455
455
  "source": [
456
456
  "# Find the table\n",
457
- "table = soup.find('table', id='product-table')\n",
457
+ "table = soup.find(\"table\", id=\"product-table\")\n",
458
458
  "\n",
459
459
  "# Extract headers\n",
460
460
  "headers = []\n",
461
- "for th in table.find_all('th'):\n",
462
- " header_text = th.find('div', class_='header-text-space').text.strip()\n",
461
+ "for th in table.find_all(\"th\"):\n",
462
+ " header_text = th.find(\"div\", class_=\"header-text-space\").text.strip()\n",
463
463
  " headers.append(header_text)\n",
464
464
  "\n",
465
465
  "# Extract rows\n",
466
466
  "data = []\n",
467
- "for row in table.find('tbody').find_all('tr'):\n",
467
+ "for row in table.find(\"tbody\").find_all(\"tr\"):\n",
468
468
  " row_data = {}\n",
469
- " cells = row.find_all('td')\n",
469
+ " cells = row.find_all(\"td\")\n",
470
470
  " for i, cell in enumerate(cells):\n",
471
471
  " row_data[headers[i]] = cell.text.strip()\n",
472
472
  " data.append(row_data)\n",
473
473
  "\n",
474
474
  "# Convert to pandas DataFrame for easy viewing/export\n",
475
475
  "import pandas as pd\n",
476
+ "\n",
476
477
  "df = pd.DataFrame(data)\n",
477
478
  "print(df)"
478
479
  ]
@@ -526,7 +527,7 @@
526
527
  }
527
528
  ],
528
529
  "source": [
529
- "df['TDP'].replace(\"W\", \"\")"
530
+ "df[\"TDP\"].replace(\"W\", \"\")"
530
531
  ]
531
532
  },
532
533
  {
@@ -554,24 +555,28 @@
554
555
  "\n",
555
556
  "# Path to your manually exported AMD CPU dataset.\n",
556
557
  "# (Adjust the file path as needed.)\n",
557
- "amd_csv_path = './AMD_CPU_desktop_laptop.csv'\n",
558
+ "amd_csv_path = \"./AMD_CPU_desktop_laptop.csv\"\n",
558
559
  "\n",
559
560
  "try:\n",
560
561
  " amd_df = pd.read_csv(amd_csv_path)\n",
561
- " amd_df = amd_df[amd_df['Launch Date'].str.contains(\"2024|2025\", na=False)]\n",
562
- " amd_df = amd_df[amd_df['Form Factor'].str.contains(\"Desktops\", na=False)]\n",
563
- " \n",
562
+ " amd_df = amd_df[amd_df[\"Launch Date\"].str.contains(\"2024|2025\", na=False)]\n",
563
+ " amd_df = amd_df[amd_df[\"Form Factor\"].str.contains(\"Desktops\", na=False)]\n",
564
+ "\n",
564
565
  " # Convert columns to numeric, forcing errors to NaN\n",
565
566
  " # amd_df['Default TDP'] = amd_df['Default TDP'].str.replace('W', '').astype(float)\n",
566
- " amd_df['TDP'] = pd.to_numeric(amd_df['Default TDP'].str.replace('W', ''), errors='coerce')\n",
567
- " amd_df['# of Threads'] = pd.to_numeric(amd_df['# of Threads'], errors='coerce')\n",
568
- " \n",
567
+ " amd_df[\"TDP\"] = pd.to_numeric(\n",
568
+ " amd_df[\"Default TDP\"].str.replace(\"W\", \"\"), errors=\"coerce\"\n",
569
+ " )\n",
570
+ " amd_df[\"# of Threads\"] = pd.to_numeric(amd_df[\"# of Threads\"], errors=\"coerce\")\n",
571
+ "\n",
569
572
  " # It is assumed the CSV contains columns named 'TDP' (in Watts) and 'Total Cores'\n",
570
573
  " # Adjust the column names if they differ.\n",
571
- " amd_df['TDP_per_core'] = amd_df['TDP'] / amd_df['# of Threads']\n",
572
- " \n",
573
- " average_tdp_per_core = amd_df['TDP_per_core'].mean()\n",
574
- " print(\"Average TDP per core from AMD dataset: {:.2f} W\".format(average_tdp_per_core))\n",
574
+ " amd_df[\"TDP_per_core\"] = amd_df[\"TDP\"] / amd_df[\"# of Threads\"]\n",
575
+ "\n",
576
+ " average_tdp_per_core = amd_df[\"TDP_per_core\"].mean()\n",
577
+ " print(\n",
578
+ " \"Average TDP per core from AMD dataset: {:.2f} W\".format(average_tdp_per_core)\n",
579
+ " )\n",
575
580
  "except Exception as e:\n",
576
581
  " print(\"Error loading or processing AMD dataset:\", e)"
577
582
  ]
@@ -595,23 +600,27 @@
595
600
  "\n",
596
601
  "# Path to your manually exported AMD CPU dataset.\n",
597
602
  "# (Adjust the file path as needed.)\n",
598
- "amd_csv_path = './AMD_Server_Processor_Specifications.csv'\n",
603
+ "amd_csv_path = \"./AMD_Server_Processor_Specifications.csv\"\n",
599
604
  "\n",
600
605
  "try:\n",
601
606
  " amd_df = pd.read_csv(amd_csv_path)\n",
602
- " amd_df = amd_df[amd_df['Launch Date'].str.contains(\"2024|2025\", na=False)]\n",
603
- " \n",
607
+ " amd_df = amd_df[amd_df[\"Launch Date\"].str.contains(\"2024|2025\", na=False)]\n",
608
+ "\n",
604
609
  " # Convert columns to numeric, forcing errors to NaN\n",
605
610
  " # amd_df['Default TDP'] = amd_df['Default TDP'].str.replace('W', '').astype(float)\n",
606
- " amd_df['TDP'] = pd.to_numeric(amd_df['Default TDP'].str.replace('W', ''), errors='coerce')\n",
607
- " amd_df['# of Threads'] = pd.to_numeric(amd_df['# of Threads'], errors='coerce')\n",
608
- " \n",
611
+ " amd_df[\"TDP\"] = pd.to_numeric(\n",
612
+ " amd_df[\"Default TDP\"].str.replace(\"W\", \"\"), errors=\"coerce\"\n",
613
+ " )\n",
614
+ " amd_df[\"# of Threads\"] = pd.to_numeric(amd_df[\"# of Threads\"], errors=\"coerce\")\n",
615
+ "\n",
609
616
  " # It is assumed the CSV contains columns named 'TDP' (in Watts) and 'Total Cores'\n",
610
617
  " # Adjust the column names if they differ.\n",
611
- " amd_df['TDP_per_core'] = amd_df['TDP'] / amd_df['# of Threads']\n",
612
- " \n",
613
- " average_tdp_per_core = amd_df['TDP_per_core'].mean()\n",
614
- " print(\"Average TDP per core from AMD dataset: {:.2f} W\".format(average_tdp_per_core))\n",
618
+ " amd_df[\"TDP_per_core\"] = amd_df[\"TDP\"] / amd_df[\"# of Threads\"]\n",
619
+ "\n",
620
+ " average_tdp_per_core = amd_df[\"TDP_per_core\"].mean()\n",
621
+ " print(\n",
622
+ " \"Average TDP per core from AMD dataset: {:.2f} W\".format(average_tdp_per_core)\n",
623
+ " )\n",
615
624
  "except Exception as e:\n",
616
625
  " print(\"Error loading or processing AMD dataset:\", e)"
617
626
  ]
@@ -1055,7 +1064,7 @@
1055
1064
  }
1056
1065
  ],
1057
1066
  "source": [
1058
- "amd_df[['Name', 'TDP', '# of Threads', 'TDP_per_core', 'Launch Date']]"
1067
+ "amd_df[[\"Name\", \"TDP\", \"# of Threads\", \"TDP_per_core\", \"Launch Date\"]]"
1059
1068
  ]
1060
1069
  },
1061
1070
  {
@@ -1179,7 +1188,7 @@
1179
1188
  }
1180
1189
  ],
1181
1190
  "source": [
1182
- "df = pd.read_csv('cpu_power.csv')\n",
1191
+ "df = pd.read_csv(\"cpu_power.csv\")\n",
1183
1192
  "df"
1184
1193
  ]
1185
1194
  },
@@ -1246,12 +1255,14 @@
1246
1255
  }
1247
1256
  ],
1248
1257
  "source": [
1249
- "amd_csv_path = './AMD_Server_Processor_Specifications.csv'\n",
1258
+ "amd_csv_path = \"./AMD_Server_Processor_Specifications.csv\"\n",
1250
1259
  "amd_df = pd.read_csv(amd_csv_path)\n",
1251
- "amd_df['TDP'] = pd.to_numeric(amd_df['Default TDP'].str.replace('W', ''), errors='coerce')\n",
1252
- "amd_df['Name'] = amd_df['Name'].str.replace('™', '')\n",
1253
- "amd_server = amd_df[['Name', 'TDP' ]]\n",
1254
- "amd_server = amd_server.dropna(subset=['TDP'])\n",
1260
+ "amd_df[\"TDP\"] = pd.to_numeric(\n",
1261
+ " amd_df[\"Default TDP\"].str.replace(\"W\", \"\"), errors=\"coerce\"\n",
1262
+ ")\n",
1263
+ "amd_df[\"Name\"] = amd_df[\"Name\"].str.replace(\"™\", \"\")\n",
1264
+ "amd_server = amd_df[[\"Name\", \"TDP\"]]\n",
1265
+ "amd_server = amd_server.dropna(subset=[\"TDP\"])\n",
1255
1266
  "amd_server.head(3)"
1256
1267
  ]
1257
1268
  },
@@ -1406,11 +1417,13 @@
1406
1417
  ],
1407
1418
  "source": [
1408
1419
  "# Merge df with amd_server, avoiding duplicates\n",
1409
- "amd_server['TDP'] = amd_server['TDP'].astype(int).astype(str)\n",
1410
- "merged_df = df.merge(amd_server, on='Name', how='outer', suffixes=('', '_AMD'), indicator=True)\n",
1420
+ "amd_server[\"TDP\"] = amd_server[\"TDP\"].astype(int).astype(str)\n",
1421
+ "merged_df = df.merge(\n",
1422
+ " amd_server, on=\"Name\", how=\"outer\", suffixes=(\"\", \"_AMD\"), indicator=True\n",
1423
+ ")\n",
1411
1424
  "# Filter for new entries that are only in amd_server\n",
1412
- "new_cpus = merged_df[merged_df['_merge'] == 'right_only']\n",
1413
- "new_cpus['TDP'] = new_cpus['TDP_AMD']\n",
1425
+ "new_cpus = merged_df[merged_df[\"_merge\"] == \"right_only\"]\n",
1426
+ "new_cpus[\"TDP\"] = new_cpus[\"TDP_AMD\"]\n",
1414
1427
  "new_cpus"
1415
1428
  ]
1416
1429
  },
@@ -1528,11 +1541,9 @@
1528
1541
  }
1529
1542
  ],
1530
1543
  "source": [
1531
- "\n",
1532
- "\n",
1533
1544
  "# merged_df.query('Name.str.contains(\"EPYC\")')\n",
1534
- "new_cpus_to_add = new_cpus.drop(columns=['_merge']).loc[:, df.columns]\n",
1535
- "new_cpus_to_add\n"
1545
+ "new_cpus_to_add = new_cpus.drop(columns=[\"_merge\"]).loc[:, df.columns]\n",
1546
+ "new_cpus_to_add"
1536
1547
  ]
1537
1548
  },
1538
1549
  {
@@ -1588,7 +1599,7 @@
1588
1599
  "source": [
1589
1600
  "# Option 2: Append the new CPUs to the original df\n",
1590
1601
  "df = pd.concat([df, new_cpus_to_add], ignore_index=True)\n",
1591
- "df.sort_values('Name', ascending=True, inplace=True)\n",
1602
+ "df.sort_values(\"Name\", ascending=True, inplace=True)\n",
1592
1603
  "df.query('Name.str.contains(\"AMD EPYC 4124P\")')"
1593
1604
  ]
1594
1605
  },
@@ -1598,7 +1609,36 @@
1598
1609
  "metadata": {},
1599
1610
  "outputs": [],
1600
1611
  "source": [
1601
- "df.to_csv('cpu_power.csv', index=False)"
1612
+ "df.to_csv(\"cpu_power.csv\", index=False)"
1613
+ ]
1614
+ },
1615
+ {
1616
+ "cell_type": "markdown",
1617
+ "metadata": {},
1618
+ "source": [
1619
+ "# Remove with..."
1620
+ ]
1621
+ },
1622
+ {
1623
+ "cell_type": "code",
1624
+ "execution_count": 1,
1625
+ "metadata": {},
1626
+ "outputs": [],
1627
+ "source": [
1628
+ "import pandas as pd\n",
1629
+ "# Load the existing dataset\n",
1630
+ "df = pd.read_csv(\"../cpu_power.csv\")\n",
1631
+ "# Replace with re.sub(r\" with.*\", \"\", name)\n",
1632
+ "def clean_cpu_name(name):\n",
1633
+ " import re\n",
1634
+ " # Remove \"with\" and everything after it\n",
1635
+ " name = re.sub(r\" with.*\", \"\", name)\n",
1636
+ " # Remove \"™\" symbol\n",
1637
+ " name = name.replace(\"™\", \"\")\n",
1638
+ " return name.strip()\n",
1639
+ "df[\"Name\"] = df[\"Name\"].apply(clean_cpu_name)\n",
1640
+ "# Save the cleaned dataset\n",
1641
+ "df.to_csv(\"../cpu_power.csv\", index=False)"
1602
1642
  ]
1603
1643
  },
1604
1644
  {
@@ -1609,7 +1649,7 @@
1609
1649
  ],
1610
1650
  "metadata": {
1611
1651
  "kernelspec": {
1612
- "display_name": "codecarbon",
1652
+ "display_name": "3.10.5",
1613
1653
  "language": "python",
1614
1654
  "name": "python3"
1615
1655
  },
@@ -1623,7 +1663,7 @@
1623
1663
  "name": "python",
1624
1664
  "nbconvert_exporter": "python",
1625
1665
  "pygments_lexer": "ipython3",
1626
- "version": "3.12.3"
1666
+ "version": "3.10.5"
1627
1667
  }
1628
1668
  },
1629
1669
  "nbformat": 4,
@@ -0,0 +1,15 @@
1
+ # How to update the CPU database
2
+
3
+ To update the CPU database, you have to run:
4
+
5
+ ```bash
6
+ cd codecarbon/data/hardware/cpu_dataset_builder
7
+ hatch run pip install playwright beautifulsoup4
8
+ hatch run python intel_cpu_scrapper.py
9
+ hatch run python amd_cpu_scrapper.py
10
+ hatch run python merge_scrapped_cpu_power.py
11
+ ```
12
+
13
+ Then commit the changes to the CSV files.
14
+
15
+ CodeCarbon only use the `cpu_power.csv` file, but we keep the other files for reference and to allow someone else to use them if needed.